From a5385803db96d4cbaf293a223e5d097bf7ffc0d4 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH] nv2a: Add Vulkan renderer --- .clang-format | 4 +- .gitmodules | 13 +- config_spec.yml | 6 + configure | 2 +- debian/control | 3 + hw/xbox/nv2a/debug.h | 98 +- hw/xbox/nv2a/gl/meson.build | 6 - hw/xbox/nv2a/meson.build | 12 +- hw/xbox/nv2a/nv2a.c | 28 +- hw/xbox/nv2a/nv2a.h | 2 +- hw/xbox/nv2a/nv2a_int.h | 374 +- hw/xbox/nv2a/nv2a_regs.h | 11 + hw/xbox/nv2a/pfifo.c | 64 +- hw/xbox/nv2a/pgraph.c | 7775 ----------------- hw/xbox/nv2a/pgraph/debug_renderdoc.c | 84 + hw/xbox/nv2a/pgraph/gl/blit.c | 174 + hw/xbox/nv2a/pgraph/gl/constants.h | 322 + hw/xbox/nv2a/{ => pgraph/gl}/debug.c | 63 +- hw/xbox/nv2a/pgraph/gl/debug.h | 60 + hw/xbox/nv2a/pgraph/gl/display.c | 407 + hw/xbox/nv2a/pgraph/gl/draw.c | 528 ++ hw/xbox/nv2a/pgraph/gl/meson.build | 12 + hw/xbox/nv2a/pgraph/gl/renderer.c | 201 + hw/xbox/nv2a/pgraph/gl/renderer.h | 283 + hw/xbox/nv2a/pgraph/gl/reports.c | 111 + hw/xbox/nv2a/pgraph/gl/shaders.c | 1102 +++ hw/xbox/nv2a/pgraph/gl/surface.c | 1400 +++ hw/xbox/nv2a/pgraph/gl/texture.c | 819 ++ hw/xbox/nv2a/pgraph/gl/vertex.c | 283 + hw/xbox/nv2a/pgraph/glsl/common.c | 58 + hw/xbox/nv2a/pgraph/glsl/common.h | 38 + hw/xbox/nv2a/pgraph/glsl/geom.c | 228 + hw/xbox/nv2a/pgraph/glsl/geom.h | 34 + hw/xbox/nv2a/pgraph/glsl/meson.build | 8 + hw/xbox/nv2a/{ => pgraph/glsl}/psh.c | 174 +- hw/xbox/nv2a/pgraph/glsl/psh.h | 41 + hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 497 ++ hw/xbox/nv2a/pgraph/glsl/vsh-ff.h | 31 + .../nv2a/{vsh.c => pgraph/glsl/vsh-prog.c} | 34 +- hw/xbox/nv2a/pgraph/glsl/vsh-prog.h | 35 + hw/xbox/nv2a/pgraph/glsl/vsh.c | 274 + hw/xbox/nv2a/pgraph/glsl/vsh.h | 33 + hw/xbox/nv2a/pgraph/meson.build | 19 + .../{pgraph_methods.h => pgraph/methods.h} | 0 hw/xbox/nv2a/pgraph/null/meson.build | 3 + hw/xbox/nv2a/pgraph/null/renderer.c | 146 + hw/xbox/nv2a/pgraph/pgraph.c | 2874 ++++++ hw/xbox/nv2a/pgraph/pgraph.h | 383 + hw/xbox/nv2a/pgraph/profile.c | 74 + hw/xbox/nv2a/{ => pgraph}/psh.h | 8 +- hw/xbox/nv2a/pgraph/rdi.c | 60 + hw/xbox/nv2a/{ => pgraph}/s3tc.c | 78 +- hw/xbox/nv2a/{ => pgraph}/s3tc.h | 27 +- hw/xbox/nv2a/pgraph/shaders.c | 295 + hw/xbox/nv2a/{ => pgraph}/shaders.h | 69 +- hw/xbox/nv2a/pgraph/surface.h | 35 + hw/xbox/nv2a/{ => pgraph}/swizzle.c | 0 hw/xbox/nv2a/{ => pgraph}/swizzle.h | 6 +- hw/xbox/nv2a/pgraph/texture.c | 405 + hw/xbox/nv2a/pgraph/texture.h | 67 + .../thirdparty/gloffscreen/common.c} | 0 .../thirdparty/gloffscreen}/gloffscreen.h | 0 .../thirdparty/gloffscreen/sdl.c} | 2 +- .../nv2a/{ => pgraph}/thirdparty/meson.build | 6 + .../nv2a/{ => pgraph}/thirdparty/nv2a_vsh_cpu | 0 hw/xbox/nv2a/pgraph/util.h | 86 + hw/xbox/nv2a/pgraph/vertex.c | 131 + hw/xbox/nv2a/pgraph/vk/blit.c | 177 + hw/xbox/nv2a/pgraph/vk/buffer.c | 206 + hw/xbox/nv2a/pgraph/vk/command.c | 119 + hw/xbox/nv2a/pgraph/vk/constants.h | 418 + hw/xbox/nv2a/pgraph/vk/debug.c | 59 + hw/xbox/nv2a/pgraph/vk/debug.h | 61 + hw/xbox/nv2a/pgraph/vk/display.c | 896 ++ hw/xbox/nv2a/pgraph/vk/draw.c | 1916 ++++ hw/xbox/nv2a/pgraph/vk/glsl.c | 380 + hw/xbox/nv2a/pgraph/vk/glsl.h | 205 + hw/xbox/nv2a/pgraph/vk/image.c | 209 + hw/xbox/nv2a/pgraph/vk/instance.c | 662 ++ hw/xbox/nv2a/pgraph/vk/meson.build | 24 + hw/xbox/nv2a/pgraph/vk/renderer.c | 266 + hw/xbox/nv2a/pgraph/vk/renderer.h | 526 ++ hw/xbox/nv2a/pgraph/vk/reports.c | 134 + hw/xbox/nv2a/pgraph/vk/shaders.c | 797 ++ hw/xbox/nv2a/pgraph/vk/surface-compute.c | 473 + hw/xbox/nv2a/pgraph/vk/surface.c | 1485 ++++ hw/xbox/nv2a/pgraph/vk/texture.c | 1456 +++ hw/xbox/nv2a/pgraph/vk/vertex.c | 312 + hw/xbox/nv2a/{ => pgraph}/vsh.h | 9 +- hw/xbox/nv2a/shaders.c | 1599 ---- hw/xbox/nv2a/shaders_common.h | 125 - {hw/xbox/nv2a => include/qemu}/lru.h | 58 +- include/qemu/mstring.h | 82 + licenses/SPIRV-Reflect.license.txt | 201 + licenses/VulkanMemoryAllocator.license.txt | 19 + licenses/volk.license.txt | 19 + meson.build | 30 + scripts/archive-source.sh | 6 +- scripts/gen-license.py | 31 +- thirdparty/SPIRV-Reflect | 1 + thirdparty/VulkanMemoryAllocator | 1 + thirdparty/meson.build | 12 + thirdparty/renderdoc_app.h | 86 +- thirdparty/vma.cc | 2 + thirdparty/volk | 1 + ui/meson.build | 4 - ui/xemu.c | 4 +- ui/xui/main-menu.cc | 10 +- ui/xui/main.cc | 2 +- ui/xui/menubar.cc | 6 +- util/meson.build | 1 + util/mstring.c | 49 + xemu-version.c | 3 + xemu-version.h | 3 + 114 files changed, 23349 insertions(+), 10302 deletions(-) delete mode 100644 hw/xbox/nv2a/gl/meson.build delete mode 100644 hw/xbox/nv2a/pgraph.c create mode 100644 hw/xbox/nv2a/pgraph/debug_renderdoc.c create mode 100644 hw/xbox/nv2a/pgraph/gl/blit.c create mode 100644 hw/xbox/nv2a/pgraph/gl/constants.h rename hw/xbox/nv2a/{ => pgraph/gl}/debug.c (77%) create mode 100644 hw/xbox/nv2a/pgraph/gl/debug.h create mode 100644 hw/xbox/nv2a/pgraph/gl/display.c create mode 100644 hw/xbox/nv2a/pgraph/gl/draw.c create mode 100644 hw/xbox/nv2a/pgraph/gl/meson.build create mode 100644 hw/xbox/nv2a/pgraph/gl/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/gl/renderer.h create mode 100644 hw/xbox/nv2a/pgraph/gl/reports.c create mode 100644 hw/xbox/nv2a/pgraph/gl/shaders.c create mode 100644 hw/xbox/nv2a/pgraph/gl/surface.c create mode 100644 hw/xbox/nv2a/pgraph/gl/texture.c create mode 100644 hw/xbox/nv2a/pgraph/gl/vertex.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/common.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/common.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/geom.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/geom.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/meson.build rename hw/xbox/nv2a/{ => pgraph/glsl}/psh.c (90%) create mode 100644 hw/xbox/nv2a/pgraph/glsl/psh.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-ff.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-ff.h rename hw/xbox/nv2a/{vsh.c => pgraph/glsl/vsh-prog.c} (97%) create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-prog.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh.h create mode 100644 hw/xbox/nv2a/pgraph/meson.build rename hw/xbox/nv2a/{pgraph_methods.h => pgraph/methods.h} (100%) create mode 100644 hw/xbox/nv2a/pgraph/null/meson.build create mode 100644 hw/xbox/nv2a/pgraph/null/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/pgraph.c create mode 100644 hw/xbox/nv2a/pgraph/pgraph.h create mode 100644 hw/xbox/nv2a/pgraph/profile.c rename hw/xbox/nv2a/{ => pgraph}/psh.h (96%) create mode 100644 hw/xbox/nv2a/pgraph/rdi.c rename hw/xbox/nv2a/{ => pgraph}/s3tc.c (71%) rename hw/xbox/nv2a/{ => pgraph}/s3tc.h (63%) create mode 100644 hw/xbox/nv2a/pgraph/shaders.c rename hw/xbox/nv2a/{ => pgraph}/shaders.h (56%) create mode 100644 hw/xbox/nv2a/pgraph/surface.h rename hw/xbox/nv2a/{ => pgraph}/swizzle.c (100%) rename hw/xbox/nv2a/{ => pgraph}/swizzle.h (94%) create mode 100644 hw/xbox/nv2a/pgraph/texture.c create mode 100644 hw/xbox/nv2a/pgraph/texture.h rename hw/xbox/nv2a/{gl/gloffscreen_common.c => pgraph/thirdparty/gloffscreen/common.c} (100%) rename hw/xbox/nv2a/{gl => pgraph/thirdparty/gloffscreen}/gloffscreen.h (100%) rename hw/xbox/nv2a/{gl/gloffscreen_sdl.c => pgraph/thirdparty/gloffscreen/sdl.c} (98%) rename hw/xbox/nv2a/{ => pgraph}/thirdparty/meson.build (62%) rename hw/xbox/nv2a/{ => pgraph}/thirdparty/nv2a_vsh_cpu (100%) create mode 100644 hw/xbox/nv2a/pgraph/util.h create mode 100644 hw/xbox/nv2a/pgraph/vertex.c create mode 100644 hw/xbox/nv2a/pgraph/vk/blit.c create mode 100644 hw/xbox/nv2a/pgraph/vk/buffer.c create mode 100644 hw/xbox/nv2a/pgraph/vk/command.c create mode 100644 hw/xbox/nv2a/pgraph/vk/constants.h create mode 100644 hw/xbox/nv2a/pgraph/vk/debug.c create mode 100644 hw/xbox/nv2a/pgraph/vk/debug.h create mode 100644 hw/xbox/nv2a/pgraph/vk/display.c create mode 100644 hw/xbox/nv2a/pgraph/vk/draw.c create mode 100644 hw/xbox/nv2a/pgraph/vk/glsl.c create mode 100644 hw/xbox/nv2a/pgraph/vk/glsl.h create mode 100644 hw/xbox/nv2a/pgraph/vk/image.c create mode 100644 hw/xbox/nv2a/pgraph/vk/instance.c create mode 100644 hw/xbox/nv2a/pgraph/vk/meson.build create mode 100644 hw/xbox/nv2a/pgraph/vk/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/vk/renderer.h create mode 100644 hw/xbox/nv2a/pgraph/vk/reports.c create mode 100644 hw/xbox/nv2a/pgraph/vk/shaders.c create mode 100644 hw/xbox/nv2a/pgraph/vk/surface-compute.c create mode 100644 hw/xbox/nv2a/pgraph/vk/surface.c create mode 100644 hw/xbox/nv2a/pgraph/vk/texture.c create mode 100644 hw/xbox/nv2a/pgraph/vk/vertex.c rename hw/xbox/nv2a/{ => pgraph}/vsh.h (92%) delete mode 100644 hw/xbox/nv2a/shaders.c delete mode 100644 hw/xbox/nv2a/shaders_common.h rename {hw/xbox/nv2a => include/qemu}/lru.h (87%) create mode 100644 include/qemu/mstring.h create mode 100644 licenses/SPIRV-Reflect.license.txt create mode 100644 licenses/VulkanMemoryAllocator.license.txt create mode 100644 licenses/volk.license.txt create mode 160000 thirdparty/SPIRV-Reflect create mode 160000 thirdparty/VulkanMemoryAllocator create mode 100644 thirdparty/meson.build create mode 100644 thirdparty/vma.cc create mode 160000 thirdparty/volk create mode 100644 util/mstring.c diff --git a/.clang-format b/.clang-format index 8750a94dc8..3779a03403 100644 --- a/.clang-format +++ b/.clang-format @@ -71,8 +71,8 @@ IndentWidth: 4 AccessModifierOffset: -4 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -MacroBlockEnd: '.*_END$' +#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? +#MacroBlockEnd: '.*_END$' MaxEmptyLinesToKeep: 2 #PenaltyBreakBeforeFirstCallParameter: 19 #PenaltyBreakComment: 300 diff --git a/.gitmodules b/.gitmodules index 4118661130..420d7d9cd2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,9 +82,18 @@ [submodule "tomlplusplus"] path = tomlplusplus url = https://github.com/marzer/tomlplusplus -[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"] - path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu +[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"] + path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu url = https://github.com/abaire/nv2a_vsh_cpu.git [submodule "ui/thirdparty/httplib"] path = ui/thirdparty/httplib url = https://github.com/yhirose/cpp-httplib +[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"] + path = thirdparty/VulkanMemoryAllocator + url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator +[submodule "thirdparty/volk"] + path = thirdparty/volk + url = https://github.com/zeux/volk +[submodule "thirdparty/SPIRV-Reflect"] + path = thirdparty/SPIRV-Reflect + url = https://github.com/KhronosGroup/SPIRV-Reflect diff --git a/config_spec.yml b/config_spec.yml index b858606e68..f2c3736a8f 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -130,6 +130,12 @@ input: default: 18 # w display: + renderer: + type: enum + values: ["NULL", OPENGL, VULKAN] + default: OPENGL + vulkan: + validation_layers: bool quality: surface_scale: type: integer diff --git a/configure b/configure index 11471698b6..880f30c4bd 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ else git_submodules_action="ignore" fi -git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu" +git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" git="git" # Don't accept a target_list environment variable. diff --git a/debian/control b/debian/control index 91ed61433f..30603057ea 100644 --- a/debian/control +++ b/debian/control @@ -16,6 +16,9 @@ Build-Depends: debhelper (>= 11), libssl-dev, libpcap-dev, libslirp-dev, + glslang-dev, + libvulkan-dev, + Standards-Version: 3.9.8 Homepage: https://xemu.app XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h index 0c2c3d5f76..8a7fcc1449 100644 --- a/hw/xbox/nv2a/debug.h +++ b/hw/xbox/nv2a/debug.h @@ -1,8 +1,9 @@ /* - * QEMU Geforce NV2A debug helpers + * QEMU Geforce NV2A profiling and debug helpers * - * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2023 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,8 +19,8 @@ * License along with this library; if not, see . */ -#ifndef HW_NV2A_DEBUG_H -#define HW_NV2A_DEBUG_H +#ifndef HW_XBOX_NV2A_DEBUG_H +#define HW_XBOX_NV2A_DEBUG_H #include @@ -36,54 +37,6 @@ # define NV2A_DPRINTF(format, ...) do { } while (0) #endif -// #define DEBUG_NV2A_GL -#ifdef DEBUG_NV2A_GL - -#include -#include "gl/gloffscreen.h" -#include "config-host.h" - -void gl_debug_initialize(void); -void gl_debug_message(bool cc, const char *fmt, ...); -void gl_debug_group_begin(const char *fmt, ...); -void gl_debug_group_end(void); -void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...); -void gl_debug_frame_terminator(void); - -# define NV2A_GL_DPRINTF(cc, format, ...) \ - gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__) -# define NV2A_GL_DGROUP_BEGIN(format, ...) \ - gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__) -# define NV2A_GL_DGROUP_END() \ - gl_debug_group_end() -# define NV2A_GL_DLABEL(target, name, format, ...) \ - gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__) -#define NV2A_GL_DFRAME_TERMINATOR() \ - gl_debug_frame_terminator() - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef CONFIG_RENDERDOC -bool nv2a_dbg_renderdoc_available(void); -void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); -#endif - -#ifdef __cplusplus -} -#endif - -#else -# define NV2A_GL_DPRINTF(cc, format, ...) do { \ - if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \ - } while (0) -# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0) -# define NV2A_GL_DGROUP_END() do { } while (0) -# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0) -# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0) -#endif - /* Debug prints to identify when unimplemented or unconfirmed features * are being exercised. These cases likely result in graphical problems of * varying degree, but should otherwise not crash the system. Enable this @@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); #endif #define NV2A_PROF_COUNTERS_XMAC \ + _X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \ + _X(NV2A_PROF_FINISH_SURFACE_CREATE) \ + _X(NV2A_PROF_FINISH_SURFACE_DOWN) \ + _X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \ + _X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \ + _X(NV2A_PROF_FINISH_PRESENTING) \ + _X(NV2A_PROF_FINISH_FLIP_STALL) \ + _X(NV2A_PROF_FINISH_FLUSH) \ + _X(NV2A_PROF_CLEAR) \ + _X(NV2A_PROF_QUEUE_SUBMIT) \ + _X(NV2A_PROF_QUEUE_SUBMIT_AUX) \ + _X(NV2A_PROF_PIPELINE_NOTDIRTY) \ + _X(NV2A_PROF_PIPELINE_GEN) \ + _X(NV2A_PROF_PIPELINE_BIND) \ + _X(NV2A_PROF_PIPELINE_MERGE) \ + _X(NV2A_PROF_PIPELINE_RENDERPASSES) \ _X(NV2A_PROF_BEGIN_ENDS) \ _X(NV2A_PROF_DRAW_ARRAYS) \ _X(NV2A_PROF_INLINE_BUFFERS) \ @@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); _X(NV2A_PROF_SHADER_GEN) \ _X(NV2A_PROF_SHADER_BIND) \ _X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \ + _X(NV2A_PROF_SHADER_UBO_DIRTY) \ + _X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \ _X(NV2A_PROF_ATTR_BIND) \ _X(NV2A_PROF_TEX_UPLOAD) \ - _X(NV2A_PROF_TEX_BIND) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \ + _X(NV2A_PROF_SURF_SWIZZLE) \ + _X(NV2A_PROF_SURF_CREATE) \ _X(NV2A_PROF_SURF_DOWNLOAD) \ _X(NV2A_PROF_SURF_UPLOAD) \ _X(NV2A_PROF_SURF_TO_TEX) \ _X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \ + _X(NV2A_PROF_QUEUE_SUBMIT_1) \ + _X(NV2A_PROF_QUEUE_SUBMIT_2) \ + _X(NV2A_PROF_QUEUE_SUBMIT_3) \ + _X(NV2A_PROF_QUEUE_SUBMIT_4) \ + _X(NV2A_PROF_QUEUE_SUBMIT_5) \ enum NV2A_PROF_COUNTERS_ENUM { #define _X(x) x, @@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats; const char *nv2a_profile_get_counter_name(unsigned int cnt); int nv2a_profile_get_counter_value(unsigned int cnt); +void nv2a_profile_increment(void); +void nv2a_profile_flip_stall(void); + +static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt) +{ + g_nv2a_stats.frame_working.counters[cnt] += 1; +} + +#ifdef CONFIG_RENDERDOC +void nv2a_dbg_renderdoc_init(void); +void *nv2a_dbg_renderdoc_get_api(void); +bool nv2a_dbg_renderdoc_available(void); +void nv2a_dbg_renderdoc_capture_frames(int num_frames); +extern int renderdoc_capture_frames; +#endif #ifdef __cplusplus } diff --git a/hw/xbox/nv2a/gl/meson.build b/hw/xbox/nv2a/gl/meson.build deleted file mode 100644 index 973a9aa8c1..0000000000 --- a/hw/xbox/nv2a/gl/meson.build +++ /dev/null @@ -1,6 +0,0 @@ -softmmu_ss.add([sdl, files( - 'gloffscreen_common.c', - 'gloffscreen_sdl.c', - )]) - -# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS) diff --git a/hw/xbox/nv2a/meson.build b/hw/xbox/nv2a/meson.build index d3b159a3bc..29eff86e27 100644 --- a/hw/xbox/nv2a/meson.build +++ b/hw/xbox/nv2a/meson.build @@ -1,27 +1,17 @@ specific_ss.add(files( 'nv2a.c', - 'debug.c', 'pbus.c', 'pcrtc.c', 'pfb.c', 'pfifo.c', - 'pgraph.c', 'pmc.c', 'pramdac.c', 'prmcio.c', 'prmdio.c', 'prmvio.c', - 'psh.c', 'ptimer.c', 'pvideo.c', - 'shaders.c', 'stubs.c', 'user.c', - 'vsh.c', - 'swizzle.c', - 's3tc.c', )) -subdir('gl') - -subdir('thirdparty') -specific_ss.add(nv2a_vsh_cpu) +subdir('pgraph') diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c index e068f76dc9..7b16113115 100644 --- a/hw/xbox/nv2a/nv2a.c +++ b/hw/xbox/nv2a/nv2a.c @@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s, *pline_compare = line_compare; } +const uint8_t *nv2a_get_dac_palette(void) +{ + return g_nv2a->puserdac.palette; +} + +int nv2a_get_screen_off(void) +{ + return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF; +} + static void nv2a_vga_gfx_update(void *opaque) { VGACommonState *vga = opaque; @@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d) } memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs)); - memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs)); + memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_)); memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs)); d->pcrtc.start = 0; @@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) if (state == RUN_STATE_SAVE_VM) { nv2a_lock_fifo(d); qatomic_set(&d->pfifo.halt, true); - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true); - qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete); + d->pgraph.renderer->ops.pre_savevm_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete); + d->pgraph.renderer->ops.pre_savevm_wait(d); qemu_mutex_lock_iothread(); nv2a_lock_fifo(d); } else if (state == RUN_STATE_RESTORE_VM) { @@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) nv2a_unlock_fifo(d); } else if (state == RUN_STATE_SHUTDOWN) { nv2a_lock_fifo(d); - qatomic_set(&d->pgraph.shader_cache_writeback_pending, true); - qemu_event_reset(&d->pgraph.shader_cache_writeback_complete); + d->pgraph.renderer->ops.pre_shutdown_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - qemu_event_wait(&d->pgraph.shader_cache_writeback_complete); + d->pgraph.renderer->ops.pre_shutdown_wait(d); qemu_mutex_lock_iothread(); } } @@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = { VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState), VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState), - VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250), - VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250), - VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000), + VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250), + VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250), + VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000), VMSTATE_UINT32(pmc.pending_interrupts, NV2AState), VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState), VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState), diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h index 35b63749e4..a5c4468deb 100644 --- a/hw/xbox/nv2a/nv2a.h +++ b/hw/xbox/nv2a/nv2a.h @@ -22,7 +22,7 @@ #define HW_NV2A_H void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram); -void nv2a_gl_context_init(void); +void nv2a_context_init(void); int nv2a_get_framebuffer_surface(void); void nv2a_set_surface_scale_factor(unsigned int scale); unsigned int nv2a_get_surface_scale_factor(void); diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 31ab6d89ca..9b0189ebc8 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -44,25 +44,12 @@ #include "cpu.h" #include "trace.h" -#include "swizzle.h" -#include "lru.h" -#include "gl/gloffscreen.h" #include "nv2a.h" +#include "pgraph/pgraph.h" #include "debug.h" -#include "shaders.h" #include "nv2a_regs.h" -#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask)) - -#define SET_MASK(v, mask, val) \ - ({ \ - const unsigned int __val = (val); \ - const unsigned int __mask = (mask); \ - (v) &= ~(__mask); \ - (v) |= ((__val) << ctz32(__mask)) & (__mask); \ - }) - #define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a") enum FIFOEngine { @@ -78,347 +65,6 @@ typedef struct DMAObject { hwaddr limit; } DMAObject; -typedef struct VertexAttribute { - bool dma_select; - hwaddr offset; - - /* inline arrays are packed in order? - * Need to pass the offset to converted attributes */ - unsigned int inline_array_offset; - - float inline_value[4]; - - unsigned int format; - unsigned int size; /* size of the data type */ - unsigned int count; /* number of components */ - uint32_t stride; - - bool needs_conversion; - - float *inline_buffer; - bool inline_buffer_populated; - - GLint gl_count; - GLenum gl_type; - GLboolean gl_normalize; - - GLuint gl_inline_buffer; -} VertexAttribute; - -typedef struct SurfaceFormatInfo { - unsigned int bytes_per_pixel; - GLint gl_internal_format; - GLenum gl_format; - GLenum gl_type; - GLenum gl_attachment; -} SurfaceFormatInfo; - -typedef struct Surface { - bool draw_dirty; - bool buffer_dirty; - bool write_enabled_cache; - unsigned int pitch; - - hwaddr offset; -} Surface; - -typedef struct SurfaceShape { - unsigned int z_format; - unsigned int color_format; - unsigned int zeta_format; - unsigned int log_width, log_height; - unsigned int clip_x, clip_y; - unsigned int clip_width, clip_height; - unsigned int anti_aliasing; -} SurfaceShape; - -typedef struct SurfaceBinding { - QTAILQ_ENTRY(SurfaceBinding) entry; - MemAccessCallback *access_cb; - - hwaddr vram_addr; - - SurfaceFormatInfo fmt; - SurfaceShape shape; - uintptr_t dma_addr; - uintptr_t dma_len; - bool color; - bool swizzle; - - unsigned int width; - unsigned int height; - unsigned int pitch; - size_t size; - - GLuint gl_buffer; - - bool cleared; - int frame_time; - int draw_time; - bool draw_dirty; - bool download_pending; - bool upload_pending; -} SurfaceBinding; - -typedef struct TextureShape { - bool cubemap; - unsigned int dimensionality; - unsigned int color_format; - unsigned int levels; - unsigned int width, height, depth; - bool border; - - unsigned int min_mipmap_level, max_mipmap_level; - unsigned int pitch; -} TextureShape; - -typedef struct TextureBinding { - GLenum gl_target; - GLuint gl_texture; - unsigned int refcnt; - int draw_time; - uint64_t data_hash; - unsigned int scale; - unsigned int min_filter; - unsigned int mag_filter; - unsigned int addru; - unsigned int addrv; - unsigned int addrp; - uint32_t border_color; - bool border_color_set; -} TextureBinding; - -typedef struct TextureKey { - TextureShape state; - hwaddr texture_vram_offset; - hwaddr texture_length; - hwaddr palette_vram_offset; - hwaddr palette_length; -} TextureKey; - -typedef struct TextureLruNode { - LruNode node; - TextureKey key; - TextureBinding *binding; - bool possibly_dirty; -} TextureLruNode; - -typedef struct VertexKey { - size_t count; - GLuint gl_type; - GLboolean gl_normalize; - size_t stride; - hwaddr addr; -} VertexKey; - -typedef struct VertexLruNode { - LruNode node; - VertexKey key; - GLuint gl_buffer; - bool initialized; -} VertexLruNode; - -typedef struct KelvinState { - hwaddr object_instance; -} KelvinState; - -typedef struct ContextSurfaces2DState { - hwaddr object_instance; - hwaddr dma_image_source; - hwaddr dma_image_dest; - unsigned int color_format; - unsigned int source_pitch, dest_pitch; - hwaddr source_offset, dest_offset; -} ContextSurfaces2DState; - -typedef struct ImageBlitState { - hwaddr object_instance; - hwaddr context_surfaces; - unsigned int operation; - unsigned int in_x, in_y; - unsigned int out_x, out_y; - unsigned int width, height; -} ImageBlitState; - -typedef struct BetaState { - hwaddr object_instance; - uint32_t beta; -} BetaState; - -typedef struct QueryReport { - QSIMPLEQ_ENTRY(QueryReport) entry; - bool clear; - uint32_t parameter; - unsigned int query_count; - GLuint *queries; -} QueryReport; - -typedef struct PGRAPHState { - QemuMutex lock; - - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - - int frame_time; - int draw_time; - - struct s2t_rndr { - GLuint fbo, vao, vbo, prog; - GLuint tex_loc, surface_size_loc; - } s2t_rndr; - - struct disp_rndr { - GLuint fbo, vao, vbo, prog; - GLuint display_size_loc; - GLuint line_offset_loc; - GLuint tex_loc; - GLuint pvideo_tex; - GLint pvideo_enable_loc; - GLint pvideo_tex_loc; - GLint pvideo_in_pos_loc; - GLint pvideo_pos_loc; - GLint pvideo_scale_loc; - GLint pvideo_color_key_enable_loc; - GLint pvideo_color_key_loc; - GLint palette_loc[256]; - } disp_rndr; - - /* subchannels state we're not sure the location of... */ - ContextSurfaces2DState context_surfaces_2d; - ImageBlitState image_blit; - KelvinState kelvin; - BetaState beta; - - hwaddr dma_color, dma_zeta; - Surface surface_color, surface_zeta; - unsigned int surface_type; - SurfaceShape surface_shape; - SurfaceShape last_surface_shape; - QTAILQ_HEAD(, SurfaceBinding) surfaces; - SurfaceBinding *color_binding, *zeta_binding; - struct { - int clip_x; - int clip_width; - int clip_y; - int clip_height; - int width; - int height; - } surface_binding_dim; // FIXME: Refactor - - hwaddr dma_a, dma_b; - Lru texture_cache; - TextureLruNode *texture_cache_entries; - bool texture_dirty[NV2A_MAX_TEXTURES]; - TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; - - Lru shader_cache; - ShaderLruNode *shader_cache_entries; - ShaderBinding *shader_binding; - QemuMutex shader_cache_lock; - QemuThread shader_disk_thread; - - bool texture_matrix_enable[NV2A_MAX_TEXTURES]; - - GLuint gl_framebuffer; - - GLuint gl_display_buffer; - GLint gl_display_buffer_internal_format; - GLsizei gl_display_buffer_width; - GLsizei gl_display_buffer_height; - GLenum gl_display_buffer_format; - GLenum gl_display_buffer_type; - - hwaddr dma_state; - hwaddr dma_notifies; - hwaddr dma_semaphore; - - hwaddr dma_report; - hwaddr report_offset; - bool zpass_pixel_count_enable; - unsigned int zpass_pixel_count_result; - unsigned int gl_zpass_pixel_count_query_count; - GLuint *gl_zpass_pixel_count_queries; - QSIMPLEQ_HEAD(, QueryReport) report_queue; - - hwaddr dma_vertex_a, dma_vertex_b; - - uint32_t primitive_mode; - - bool enable_vertex_program_write; - - uint32_t vertex_state_shader_v0[4]; - uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; - bool program_data_dirty; - - uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; - - /* lighting constant arrays */ - uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; - bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; - uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; - bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; - uint32_t ltc1[NV2A_LTC1_COUNT][4]; - bool ltc1_dirty[NV2A_LTC1_COUNT]; - - float material_alpha; - - // should figure out where these are in lighting context - float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; - float light_infinite_direction[NV2A_MAX_LIGHTS][3]; - float light_local_position[NV2A_MAX_LIGHTS][3]; - float light_local_attenuation[NV2A_MAX_LIGHTS][3]; - - float point_params[8]; - - VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; - uint16_t compressed_attrs; - - Lru element_cache; - VertexLruNode *element_cache_entries; - - unsigned int inline_array_length; - uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; - GLuint gl_inline_array_buffer; - - unsigned int inline_elements_length; - uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; - - unsigned int inline_buffer_length; - - unsigned int draw_arrays_length; - unsigned int draw_arrays_min_start; - unsigned int draw_arrays_max_count; - /* FIXME: Unknown size, possibly endless, 1250 will do for now */ - /* Keep in sync with size used in nv2a.c */ - GLint gl_draw_arrays_start[1250]; - GLsizei gl_draw_arrays_count[1250]; - bool draw_arrays_prevent_connect; - - GLuint gl_memory_buffer; - GLuint gl_vertex_array; - - uint32_t regs[0x2000]; - - bool clearing; - bool waiting_for_nop; - bool waiting_for_flip; - bool waiting_for_context_switch; - bool downloads_pending; - bool download_dirty_surfaces_pending; - bool flush_pending; - bool gl_sync_pending; - bool shader_cache_writeback_pending; - QemuEvent downloads_complete; - QemuEvent dirty_surfaces_download_complete; - QemuEvent flush_complete; - QemuEvent gl_sync_complete; - QemuEvent shader_cache_writeback_complete; - - unsigned int surface_scale_factor; - uint8_t *scale_buf; -} PGRAPHState; - typedef struct NV2AState { /*< private >*/ PCIDevice parent_obj; @@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo { } NV2ABlockInfo; extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS]; -extern GloContext *g_nv2a_context_render; -extern GloContext *g_nv2a_context_display; - void nv2a_update_irq(NV2AState *d); static inline @@ -566,20 +209,5 @@ DEFINE_PROTO(user) DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address); void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len); -void pgraph_init(NV2AState *d); -void pgraph_destroy(PGRAPHState *pg); -void pgraph_context_switch(NV2AState *d, unsigned int channel_id); -int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, - uint32_t parameter, uint32_t *parameters, - size_t num_words_available, size_t max_lookahead_words, - bool inc); -void pgraph_gl_sync(NV2AState *d); -void pgraph_process_pending_reports(NV2AState *d); -void pgraph_process_pending_downloads(NV2AState *d); -void pgraph_download_dirty_surfaces(NV2AState *d); -void pgraph_flush(NV2AState *d); - -void *pfifo_thread(void *arg); -void pfifo_kick(NV2AState *d); #endif diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h index 108db8f716..78a9091eb5 100644 --- a/hw/xbox/nv2a/nv2a_regs.h +++ b/hw/xbox/nv2a/nv2a_regs.h @@ -21,6 +21,17 @@ #ifndef HW_NV2A_REGS_H #define HW_NV2A_REGS_H + +#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask)) + +#define SET_MASK(v, mask, val) \ + ({ \ + const unsigned int __val = (val); \ + const unsigned int __mask = (mask); \ + (v) &= ~(__mask); \ + (v) |= ((__val) << ctz32(__mask)) & (__mask); \ + }) + #define NV_NUM_BLOCKS 21 #define NV_PMC 0 /* card master control */ #define NV_PBUS 1 /* bus control */ diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c index 77dd175098..295cbbf27b 100644 --- a/hw/xbox/nv2a/pfifo.c +++ b/hw/xbox/nv2a/pfifo.c @@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d) qemu_cond_broadcast(&d->pfifo.fifo_cond); } -static bool pgraph_can_fifo_access(NV2AState *d) { - return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS; +static bool can_fifo_access(NV2AState *d) { + return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) & + NV_PGRAPH_FIFO_ACCESS; } /* If NV097_FLIP_STALL was executed, check if the flip has completed. * This will usually happen in the VSYNC interrupt handler. */ -static bool pgraph_is_flip_stall_complete(NV2AState *d) +static bool is_flip_stall_complete(NV2AState *d) { PGRAPHState *pg = &d->pgraph; - NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n", - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D)); + uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE); + + NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n", + GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D), + GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D), + GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D)); - uint32_t s = pg->regs[NV_PGRAPH_SURFACE]; if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D) != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) { return true; @@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d) if (qatomic_read(&d->pgraph.waiting_for_flip)) { qemu_mutex_lock(&d->pgraph.lock); - if (!pgraph_is_flip_stall_complete(d)) { + if (!is_flip_stall_complete(d)) { should_stall = true; } else { d->pgraph.waiting_for_flip = false; @@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d) { return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) || qatomic_read(&d->pgraph.waiting_for_context_switch) || - !pgraph_can_fifo_access(d); + !can_fifo_access(d); } static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, @@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, qemu_mutex_lock(&d->pgraph.lock); // Switch contexts if necessary - if (pgraph_can_fifo_access(d)) { + if (can_fifo_access(d)) { pgraph_context_switch(d, entry.channel_id); if (!d->pgraph.waiting_for_context_switch) { num_proc = @@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); - if (pgraph_can_fifo_access(d)) { + if (can_fifo_access(d)) { num_proc = pgraph_method(d, subchannel, method, parameter, parameters, num_words_available, max_lookahead_words, inc); @@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, static bool pfifo_pusher_should_stall(NV2AState *d) { - return !pgraph_can_fifo_access(d) || + return !can_fifo_access(d) || qatomic_read(&d->pgraph.waiting_for_nop); } @@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d) } } -static void process_requests(NV2AState *d) -{ - if (qatomic_read(&d->pgraph.downloads_pending) || - qatomic_read(&d->pgraph.download_dirty_surfaces_pending) || - qatomic_read(&d->pgraph.gl_sync_pending) || - qatomic_read(&d->pgraph.flush_pending) || - qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { - qemu_mutex_unlock(&d->pfifo.lock); - qemu_mutex_lock(&d->pgraph.lock); - if (qatomic_read(&d->pgraph.downloads_pending)) { - pgraph_process_pending_downloads(d); - } - if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) { - pgraph_download_dirty_surfaces(d); - } - if (qatomic_read(&d->pgraph.gl_sync_pending)) { - pgraph_gl_sync(d); - } - if (qatomic_read(&d->pgraph.flush_pending)) { - pgraph_flush(d); - } - if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { - shader_write_cache_reload_list(&d->pgraph); - } - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - } -} - void *pfifo_thread(void *arg) { NV2AState *d = (NV2AState *)arg; - glo_set_current(g_nv2a_context_render); + + pgraph_init_thread(d); rcu_register_thread(); @@ -487,13 +461,13 @@ void *pfifo_thread(void *arg) while (true) { d->pfifo.fifo_kick = false; - process_requests(d); + d->pgraph.renderer->ops.process_pending(d); if (!d->pfifo.halt) { pfifo_run_pusher(d); } - pgraph_process_pending_reports(d); + d->pgraph.renderer->ops.process_pending_reports(d); if (!d->pfifo.fifo_kick) { qemu_cond_broadcast(&d->pfifo.fifo_idle_cond); diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c deleted file mode 100644 index 335c73cc0f..0000000000 --- a/hw/xbox/nv2a/pgraph.c +++ /dev/null @@ -1,7775 +0,0 @@ -/* - * QEMU Geforce NV2A implementation - * - * Copyright (c) 2012 espes - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018-2021 Matt Borgerson - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "nv2a_int.h" - -#include - -#include "nv2a_vsh_emulator.h" -#include "s3tc.h" -#include "ui/xemu-settings.h" -#include "qemu/fast-hash.h" - -const float f16_max = 511.9375f; -const float f24_max = 1.0E30; - -static NV2AState *g_nv2a; -GloContext *g_nv2a_context_render; -GloContext *g_nv2a_context_display; - -NV2AStats g_nv2a_stats; - -static void nv2a_profile_increment(void) -{ - int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); - const int64_t fps_update_interval = 250000; - g_nv2a_stats.last_flip_time = now; - - static int64_t frame_count = 0; - frame_count++; - - static int64_t ts = 0; - int64_t delta = now - ts; - if (delta >= fps_update_interval) { - g_nv2a_stats.increment_fps = frame_count * 1000000 / delta; - ts = now; - frame_count = 0; - } -} - -static void nv2a_profile_flip_stall(void) -{ - glFinish(); - - int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); - int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000; - - g_nv2a_stats.frame_working.mspf = render_time; - g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] = - g_nv2a_stats.frame_working; - g_nv2a_stats.frame_ptr = - (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES; - g_nv2a_stats.frame_count++; - memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working)); -} - -static void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt) -{ - g_nv2a_stats.frame_working.counters[cnt] += 1; -} - -const char *nv2a_profile_get_counter_name(unsigned int cnt) -{ - const char *default_names[NV2A_PROF__COUNT] = { - #define _X(x) stringify(x), - NV2A_PROF_COUNTERS_XMAC - #undef _X - }; - - assert(cnt < NV2A_PROF__COUNT); - return default_names[cnt] + 10; /* 'NV2A_PROF_' */ -} - -int nv2a_profile_get_counter_value(unsigned int cnt) -{ - assert(cnt < NV2A_PROF__COUNT); - unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) % - NV2A_PROF_NUM_FRAMES; - return g_nv2a_stats.frame_history[idx].counters[cnt]; -} - -static const GLenum pgraph_texture_min_filter_map[] = { - 0, - GL_NEAREST, - GL_LINEAR, - GL_NEAREST_MIPMAP_NEAREST, - GL_LINEAR_MIPMAP_NEAREST, - GL_NEAREST_MIPMAP_LINEAR, - GL_LINEAR_MIPMAP_LINEAR, - GL_LINEAR, -}; - -static const GLenum pgraph_texture_mag_filter_map[] = { - 0, - GL_NEAREST, - GL_LINEAR, - 0, - GL_LINEAR /* TODO: Convolution filter... */ -}; - -static const GLenum pgraph_texture_addr_map[] = { - 0, - GL_REPEAT, - GL_MIRRORED_REPEAT, - GL_CLAMP_TO_EDGE, - GL_CLAMP_TO_BORDER, - GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ -}; - -static const GLenum pgraph_blend_factor_map[] = { - GL_ZERO, - GL_ONE, - GL_SRC_COLOR, - GL_ONE_MINUS_SRC_COLOR, - GL_SRC_ALPHA, - GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA, - GL_DST_COLOR, - GL_ONE_MINUS_DST_COLOR, - GL_SRC_ALPHA_SATURATE, - 0, - GL_CONSTANT_COLOR, - GL_ONE_MINUS_CONSTANT_COLOR, - GL_CONSTANT_ALPHA, - GL_ONE_MINUS_CONSTANT_ALPHA, -}; - -static const GLenum pgraph_blend_equation_map[] = { - GL_FUNC_SUBTRACT, - GL_FUNC_REVERSE_SUBTRACT, - GL_FUNC_ADD, - GL_MIN, - GL_MAX, - GL_FUNC_REVERSE_SUBTRACT, - GL_FUNC_ADD, -}; - -/* FIXME -static const GLenum pgraph_blend_logicop_map[] = { - GL_CLEAR, - GL_AND, - GL_AND_REVERSE, - GL_COPY, - GL_AND_INVERTED, - GL_NOOP, - GL_XOR, - GL_OR, - GL_NOR, - GL_EQUIV, - GL_INVERT, - GL_OR_REVERSE, - GL_COPY_INVERTED, - GL_OR_INVERTED, - GL_NAND, - GL_SET, -}; -*/ - -static const GLenum pgraph_cull_face_map[] = { - 0, - GL_FRONT, - GL_BACK, - GL_FRONT_AND_BACK -}; - -static const GLenum pgraph_depth_func_map[] = { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, -}; - -static const GLenum pgraph_stencil_func_map[] = { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, -}; - -static const GLenum pgraph_stencil_op_map[] = { - 0, - GL_KEEP, - GL_ZERO, - GL_REPLACE, - GL_INCR, - GL_DECR, - GL_INVERT, - GL_INCR_WRAP, - GL_DECR_WRAP, -}; - -typedef struct ColorFormatInfo { - unsigned int bytes_per_pixel; - bool linear; - GLint gl_internal_format; - GLenum gl_format; - GLenum gl_type; - GLenum gl_swizzle_mask[4]; - bool depth; -} ColorFormatInfo; - -static const ColorFormatInfo kelvin_color_format_map[66] = { - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = - {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = - {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = - {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = - {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = - {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = - {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - /* paletted texture */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = - {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = - {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = - {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = - {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = - {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_GREEN}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = - {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = - {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = - {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = - {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = - {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_GREEN, GL_RED, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = - {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = - {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - /* Additional information is passed to the pixel shader via the swizzle: - * RED: The depth value. - * GREEN: 0 for 16-bit, 1 for 24 bit - * BLUE: 0 for fixed, 1 for float - */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = - {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, - {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = - {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = - /* FIXME: Uses fixed-point format to match surface format hack below. */ - {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = - {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, - {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = - {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, - {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = - {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = - {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = - {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = - {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = - {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = - {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = - {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8} -}; - -static const SurfaceFormatInfo kelvin_surface_color_format_map[] = { - [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = - {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = - {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = - {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = - {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, - - // FIXME: Map channel color - [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = - {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = - {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0}, -}; - -static const SurfaceFormatInfo kelvin_surface_zeta_float_format_map[] = { - [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = - {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT}, - [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = - /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for - * now just emulate this with fixed-point Z24S8. Possible compat - * improvement with custom conversion. - */ - {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, -}; - -static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_map[] = { - [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = - {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT}, - [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = - {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, -}; - -static GLfloat supportedAliasedLineWidthRange[2] = { 0.0f, 0.0f }; -static GLfloat supportedSmoothLineWidthRange[2] = { 0.0f, 0.0f }; - -// static void pgraph_set_context_user(NV2AState *d, uint32_t val); -static void pgraph_gl_fence(void); -static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src); -static void pgraph_init_render_to_texture(NV2AState *d); -static void pgraph_init_display_renderer(NV2AState *d); -static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter); -static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr); -static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); -static void pgraph_shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); -static void pgraph_bind_shaders(PGRAPHState *pg); -static bool pgraph_framebuffer_dirty(PGRAPHState *pg); -static bool pgraph_color_write_enabled(PGRAPHState *pg); -static bool pgraph_zeta_write_enabled(PGRAPHState *pg); -static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); -static void pgraph_wait_for_surface_download(SurfaceBinding *e); -static void pgraph_surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, hwaddr len, bool write); -static SurfaceBinding *pgraph_surface_put(NV2AState *d, hwaddr addr, SurfaceBinding *e); -static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr); -static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr); -static void pgraph_unbind_surface(NV2AState *d, bool color); -static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *e); -static void pgraph_surface_evict_old(NV2AState *d); -static void pgraph_download_surface_data_if_dirty(NV2AState *d, SurfaceBinding *surface); -static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); -static void pgraph_download_surface_data_to_buffer(NV2AState *d, - SurfaceBinding *surface, - bool swizzle, bool flip, - bool downscale, - uint8_t *pixels); -static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); -static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, bool strict); -static bool pgraph_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape); -static void pgraph_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); -static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color); -static void pgraph_update_surface(NV2AState *d, bool upload, bool color_write, bool zeta_write); -static void pgraph_bind_textures(NV2AState *d); -static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_apply_scaling_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_get_surface_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, bool quick); -static void pgraph_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); -static unsigned int pgraph_bind_inline_array(NV2AState *d); -static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage); - -static float convert_f16_to_float(uint16_t f16); -static float convert_f24_to_float(uint32_t f24); -static uint8_t cliptobyte(int x); -static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b); -static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b); -static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, const uint8_t *palette_data, unsigned int width, unsigned int height, unsigned int depth, unsigned int row_pitch, unsigned int slice_pitch); -static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); -static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); -static void texture_binding_destroy(gpointer data); -static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key); -static void texture_cache_entry_post_evict(Lru *lru, LruNode *node); -static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key); - -static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key) -{ - VertexLruNode *vnode = container_of(node, VertexLruNode, node); - memcpy(&vnode->key, key, sizeof(struct VertexKey)); - vnode->initialized = false; -} - -static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - VertexLruNode *vnode = container_of(node, VertexLruNode, node); - return memcmp(&vnode->key, key, sizeof(VertexKey)); -} - -static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); -static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size); -static unsigned int kelvin_map_stencil_op(uint32_t parameter); -static unsigned int kelvin_map_polygon_mode(uint32_t parameter); -static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel); -static void pgraph_reload_surface_scale_factor(NV2AState *d); - -static uint32_t pgraph_rdi_read(PGRAPHState *pg, - unsigned int select, unsigned int address) -{ - uint32_t r = 0; - switch(select) { - case RDI_INDEX_VTX_CONSTANTS0: - case RDI_INDEX_VTX_CONSTANTS1: - assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); - r = pg->vsh_constants[address / 4][3 - address % 4]; - break; - default: - fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n", - select, address); - assert(false); - break; - } - return r; -} - -static void pgraph_rdi_write(PGRAPHState *pg, - unsigned int select, unsigned int address, - uint32_t val) -{ - switch(select) { - case RDI_INDEX_VTX_CONSTANTS0: - case RDI_INDEX_VTX_CONSTANTS1: - assert(false); /* Untested */ - assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); - pg->vsh_constants_dirty[address / 4] |= - (val != pg->vsh_constants[address / 4][3 - address % 4]); - pg->vsh_constants[address / 4][3 - address % 4] = val; - break; - default: - NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n", - select, address, val); - break; - } -} - -uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size) -{ - NV2AState *d = (NV2AState *)opaque; - PGRAPHState *pg = &d->pgraph; - - qemu_mutex_lock(&pg->lock); - - uint64_t r = 0; - switch (addr) { - case NV_PGRAPH_INTR: - r = pg->pending_interrupts; - break; - case NV_PGRAPH_INTR_EN: - r = pg->enabled_interrupts; - break; - case NV_PGRAPH_RDI_DATA: { - unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_SELECT); - unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS); - - r = pgraph_rdi_read(pg, select, address); - - /* FIXME: Overflow into select? */ - assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, - NV_PGRAPH_RDI_INDEX_ADDRESS)); - SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); - break; - } - default: - r = pg->regs[addr]; - break; - } - - qemu_mutex_unlock(&pg->lock); - - nv2a_reg_log_read(NV_PGRAPH, addr, size, r); - return r; -} - -void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) -{ - NV2AState *d = (NV2AState *)opaque; - PGRAPHState *pg = &d->pgraph; - - nv2a_reg_log_write(NV_PGRAPH, addr, size, val); - - qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here - qemu_mutex_lock(&pg->lock); - - switch (addr) { - case NV_PGRAPH_INTR: - pg->pending_interrupts &= ~val; - - if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) { - pg->waiting_for_nop = false; - } - if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) { - pg->waiting_for_context_switch = false; - } - pfifo_kick(d); - break; - case NV_PGRAPH_INTR_EN: - pg->enabled_interrupts = val; - break; - case NV_PGRAPH_INCREMENT: - if (val & NV_PGRAPH_INCREMENT_READ_3D) { - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D, - (GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D)+1) - % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_MODULO_3D) ); - nv2a_profile_increment(); - pfifo_kick(d); - } - break; - case NV_PGRAPH_RDI_DATA: { - unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_SELECT); - unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS); - - pgraph_rdi_write(pg, select, address, val); - - /* FIXME: Overflow into select? */ - assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, - NV_PGRAPH_RDI_INDEX_ADDRESS)); - SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); - break; - } - case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { - hwaddr context_address = - GET_MASK(pg->regs[NV_PGRAPH_CHANNEL_CTX_POINTER], - NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; - - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { -#ifdef DEBUG_NV2A - unsigned pgraph_channel_id = - GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); -#endif - NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", - pgraph_channel_id, context_address); - - assert(context_address < memory_region_size(&d->ramin)); - - uint8_t *context_ptr = d->ramin_ptr + context_address; - uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); - - NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); - - pg->regs[NV_PGRAPH_CTX_USER] = context_user; - // pgraph_set_context_user(d, context_user); - } - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { - /* do stuff ... */ - } - - break; - } - default: - pg->regs[addr] = val; - break; - } - - // events - switch (addr) { - case NV_PGRAPH_FIFO: - pfifo_kick(d); - break; - } - - qemu_mutex_unlock(&pg->lock); - qemu_mutex_unlock(&d->pfifo.lock); -} - -void pgraph_flush(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - bool update_surface = (pg->color_binding || pg->zeta_binding); - - /* Clear last surface shape to force recreation of buffers at next draw */ - pg->surface_color.draw_dirty = false; - pg->surface_zeta.draw_dirty = false; - memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); - pgraph_unbind_surface(d, true); - pgraph_unbind_surface(d, false); - - SurfaceBinding *s, *next; - QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) { - pgraph_surface_invalidate(d, s); - } - - pgraph_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); - - /* Sync all RAM */ - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr); - - /* FIXME: Flush more? */ - - pgraph_reload_surface_scale_factor(d); - - if (update_surface) { - pgraph_update_surface(d, true, true, true); - } - - qatomic_set(&d->pgraph.flush_pending, false); - qemu_event_set(&d->pgraph.flush_complete); -} - -#define METHOD_ADDR(gclass, name) \ - gclass ## _ ## name -#define METHOD_ADDR_TO_INDEX(x) ((x)>>2) -#define METHOD_NAME_STR(gclass, name) \ - tostring(gclass ## _ ## name) -#define METHOD_FUNC_NAME(gclass, name) \ - pgraph_ ## gclass ## _ ## name ## _handler -#define METHOD_HANDLER_ARG_DECL \ - NV2AState *d, PGRAPHState *pg, \ - unsigned int subchannel, unsigned int method, \ - uint32_t parameter, uint32_t *parameters, \ - size_t num_words_available, size_t *num_words_consumed, bool inc -#define METHOD_HANDLER_ARGS \ - d, pg, subchannel, method, parameter, parameters, \ - num_words_available, num_words_consumed, inc -#define DEF_METHOD_PROTO(gclass, name) \ - static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL) - -#define DEF_METHOD(gclass, name) \ - DEF_METHOD_PROTO(gclass, name); -#define DEF_METHOD_RANGE(gclass, name, range) \ - DEF_METHOD_PROTO(gclass, name); -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */ -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - DEF_METHOD_PROTO(gclass, name); -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 - -typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL); -static const struct { - uint32_t base; - const char *name; - MethodFunc handler; -} pgraph_kelvin_methods[0x800] = { -#define DEF_METHOD(gclass, name) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_RANGE(gclass, name, range) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \ - ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride) -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 -}; - -#define METHOD_RANGE_END_NAME(gclass, name) \ - pgraph_ ## gclass ## _ ## name ## __END -#define DEF_METHOD(gclass, name) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4; -#define DEF_METHOD_RANGE(gclass, name, range) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4*range; -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */ -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4*stride; -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 - -static void pgraph_method_inc(MethodFunc handler, uint32_t end, - METHOD_HANDLER_ARG_DECL) -{ - if (!inc) { - handler(METHOD_HANDLER_ARGS); - return; - } - size_t count = MIN(num_words_available, (end - method) / 4); - for (size_t i = 0; i < count; i++) { - parameter = ldl_le_p(parameters + i); - if (i) { - pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, - parameter); - } - handler(METHOD_HANDLER_ARGS); - method += 4; - } - *num_words_consumed = count; -} - -static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL) -{ - if (inc) { - handler(METHOD_HANDLER_ARGS); - return; - } - - for (size_t i = 0; i < num_words_available; i++) { - parameter = ldl_le_p(parameters + i); - if (i) { - pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, - parameter); - } - handler(METHOD_HANDLER_ARGS); - } - *num_words_consumed = num_words_available; -} - -#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int) -#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int) -#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name) - -#define DEF_METHOD_INC(gclass, name) \ - DEF_METHOD_INT(gclass, name); \ - DEF_METHOD(gclass, name) \ - { \ - pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \ - METHOD_RANGE_END_NAME(gclass, name), \ - METHOD_HANDLER_ARGS); \ - } \ - DEF_METHOD_INT(gclass, name) - -#define DEF_METHOD_NON_INC(gclass, name) \ - DEF_METHOD_INT(gclass, name); \ - DEF_METHOD(gclass, name) \ - { \ - pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \ - METHOD_HANDLER_ARGS); \ - } \ - DEF_METHOD_INT(gclass, name) - -// TODO: Optimize. Ideally this should all be done via OpenGL. -static void pgraph_image_blit(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; - ImageBlitState *image_blit = &pg->image_blit; - BetaState *beta = &pg->beta; - - pgraph_update_surface(d, false, true, true); - - assert(context_surfaces->object_instance == image_blit->context_surfaces); - - unsigned int bytes_per_pixel; - switch (context_surfaces->color_format) { - case NV062_SET_COLOR_FORMAT_LE_Y8: - bytes_per_pixel = 1; - break; - case NV062_SET_COLOR_FORMAT_LE_R5G6B5: - bytes_per_pixel = 2; - break; - case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_Y32: - bytes_per_pixel = 4; - break; - default: - fprintf(stderr, "Unknown blit surface format: 0x%x\n", - context_surfaces->color_format); - assert(false); - break; - } - - hwaddr source_dma_len, dest_dma_len; - - uint8_t *source = (uint8_t *)nv_dma_map( - d, context_surfaces->dma_image_source, &source_dma_len); - assert(context_surfaces->source_offset < source_dma_len); - source += context_surfaces->source_offset; - - uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, - &dest_dma_len); - assert(context_surfaces->dest_offset < dest_dma_len); - dest += context_surfaces->dest_offset; - - hwaddr source_addr = source - d->vram_ptr; - hwaddr dest_addr = dest - d->vram_ptr; - - SurfaceBinding *surf_src = pgraph_surface_get(d, source_addr); - if (surf_src) { - pgraph_download_surface_data_if_dirty(d, surf_src); - } - - SurfaceBinding *surf_dest = pgraph_surface_get(d, dest_addr); - if (surf_dest) { - if (image_blit->height < surf_dest->height || - image_blit->width < surf_dest->width) { - pgraph_download_surface_data_if_dirty(d, surf_dest); - } else { - // The blit will completely replace the surface so any pending - // download should be discarded. - surf_dest->download_pending = false; - surf_dest->draw_dirty = false; - } - surf_dest->upload_pending = true; - pg->draw_time++; - } - - hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + - image_blit->in_x * bytes_per_pixel; - hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + - image_blit->out_x * bytes_per_pixel; - - hwaddr source_size = - (image_blit->height - 1) * context_surfaces->source_pitch + - image_blit->width * bytes_per_pixel; - hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + - image_blit->width * bytes_per_pixel; - - /* FIXME: What does hardware do in this case? */ - assert(source_addr + source_offset + source_size <= - memory_region_size(d->vram)); - assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); - - uint8_t *source_row = source + source_offset; - uint8_t *dest_row = dest + dest_offset; - - if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { - NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); - for (unsigned int y = 0; y < image_blit->height; y++) { - memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); - source_row += context_surfaces->source_pitch; - dest_row += context_surfaces->dest_pitch; - } - } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { - NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); - uint32_t max_beta_mult = 0x7f80; - uint32_t beta_mult = beta->beta >> 16; - uint32_t inv_beta_mult = max_beta_mult - beta_mult; - for (unsigned int y = 0; y < image_blit->height; y++) { - for (unsigned int x = 0; x < image_blit->width; x++) { - for (unsigned int ch = 0; ch < 3; ch++) { - uint32_t a = source_row[x * 4 + ch] * beta_mult; - uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; - dest_row[x * 4 + ch] = (a + b) / max_beta_mult; - } - } - source_row += context_surfaces->source_pitch; - dest_row += context_surfaces->dest_pitch; - } - } else { - fprintf(stderr, "Unknown blit operation: 0x%x\n", - image_blit->operation); - assert(false && "Unknown blit operation"); - } - - NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); - - bool needs_alpha_patching; - uint8_t alpha_override; - switch (context_surfaces->color_format) { - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: - needs_alpha_patching = true; - alpha_override = 0xff; - break; - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: - needs_alpha_patching = true; - alpha_override = 0; - break; - default: - needs_alpha_patching = false; - alpha_override = 0; - } - - if (needs_alpha_patching) { - dest_row = dest + dest_offset; - for (unsigned int y = 0; y < image_blit->height; y++) { - for (unsigned int x = 0; x < image_blit->width; x++) { - dest_row[x * 4 + 3] = alpha_override; - } - dest_row += context_surfaces->dest_pitch; - } - } - - dest_addr += dest_offset; - memory_region_set_client_dirty(d->vram, dest_addr, dest_size, - DIRTY_MEMORY_VGA); - memory_region_set_client_dirty(d->vram, dest_addr, dest_size, - DIRTY_MEMORY_NV2A_TEX); -} - -int pgraph_method(NV2AState *d, unsigned int subchannel, - unsigned int method, uint32_t parameter, - uint32_t *parameters, size_t num_words_available, - size_t max_lookahead_words, bool inc) -{ - int num_processed = 1; - - assert(glGetError() == GL_NO_ERROR); - - PGRAPHState *pg = &d->pgraph; - - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - assert(channel_valid); - - ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d; - ImageBlitState *image_blit = &pg->image_blit; - BetaState *beta = &pg->beta; - - assert(subchannel < 8); - - if (method == NV_SET_OBJECT) { - assert(parameter < memory_region_size(&d->ramin)); - uint8_t *obj_ptr = d->ramin_ptr + parameter; - - uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr); - uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4)); - uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8)); - uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12)); - uint32_t ctx_5 = parameter; - - pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1; - pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2; - pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3; - pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4; - pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5; - } - - // is this right? - pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4]; - - uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1], - NV_PGRAPH_CTX_SWITCH1_GRCLASS); - - pgraph_method_log(subchannel, graphics_class, method, parameter); - - if (subchannel != 0) { - // catches context switching issues on xbox d3d - assert(graphics_class != 0x97); - } - - /* ugly switch for now */ - switch (graphics_class) { - case NV_BETA: { - switch (method) { - case NV012_SET_OBJECT: - beta->object_instance = parameter; - break; - case NV012_SET_BETA: - if (parameter & 0x80000000) { - beta->beta = 0; - } else { - // The parameter is a signed fixed-point number with a sign bit - // and 31 fractional bits. Note that negative values are clamped - // to 0, and only 8 fractional bits are actually implemented in - // hardware. - beta->beta = parameter & 0x7f800000; - } - break; - default: - goto unhandled; - } - break; - } - case NV_CONTEXT_PATTERN: { - switch (method) { - case NV044_SET_MONOCHROME_COLOR0: - pg->regs[NV_PGRAPH_PATT_COLOR0] = parameter; - break; - default: - goto unhandled; - } - break; - } - case NV_CONTEXT_SURFACES_2D: { - switch (method) { - case NV062_SET_OBJECT: - context_surfaces_2d->object_instance = parameter; - break; - case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE: - context_surfaces_2d->dma_image_source = parameter; - break; - case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN: - context_surfaces_2d->dma_image_dest = parameter; - break; - case NV062_SET_COLOR_FORMAT: - context_surfaces_2d->color_format = parameter; - break; - case NV062_SET_PITCH: - context_surfaces_2d->source_pitch = parameter & 0xFFFF; - context_surfaces_2d->dest_pitch = parameter >> 16; - break; - case NV062_SET_OFFSET_SOURCE: - context_surfaces_2d->source_offset = parameter & 0x07FFFFFF; - break; - case NV062_SET_OFFSET_DESTIN: - context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF; - break; - default: - goto unhandled; - } - break; - } - case NV_IMAGE_BLIT: { - switch (method) { - case NV09F_SET_OBJECT: - image_blit->object_instance = parameter; - break; - case NV09F_SET_CONTEXT_SURFACES: - image_blit->context_surfaces = parameter; - break; - case NV09F_SET_OPERATION: - image_blit->operation = parameter; - break; - case NV09F_CONTROL_POINT_IN: - image_blit->in_x = parameter & 0xFFFF; - image_blit->in_y = parameter >> 16; - break; - case NV09F_CONTROL_POINT_OUT: - image_blit->out_x = parameter & 0xFFFF; - image_blit->out_y = parameter >> 16; - break; - case NV09F_SIZE: - image_blit->width = parameter & 0xFFFF; - image_blit->height = parameter >> 16; - - if (image_blit->width && image_blit->height) { - pgraph_image_blit(d); - } - break; - default: - goto unhandled; - } - break; - } - case NV_KELVIN_PRIMITIVE: { - MethodFunc handler = - pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler; - if (handler == NULL) { - goto unhandled; - } - size_t num_words_consumed = 1; - handler(d, pg, subchannel, method, parameter, parameters, - num_words_available, &num_words_consumed, inc); - - /* Squash repeated BEGIN,DRAW_ARRAYS,END */ - #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd)) - #define LAP(i, prm) (parameters[i*2+2] == (prm)) - #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm)) - - if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) && - pg->inline_elements_length == 0 && - pg->draw_arrays_length < - (ARRAY_SIZE(pg->gl_draw_arrays_start) - 1) && - LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) && - LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) && - LAM(2, NV097_DRAW_ARRAYS)) { - num_words_consumed += 4; - pg->draw_arrays_prevent_connect = true; - } - - #undef LAM - #undef LAP - #undef LAMP - - num_processed = num_words_consumed; - break; - } - default: - goto unhandled; - } - - return num_processed; - -unhandled: - trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class, - method, parameter); - return num_processed; -} - -DEF_METHOD(NV097, SET_OBJECT) -{ - pg->kelvin.object_instance = parameter; -} - -DEF_METHOD(NV097, NO_OPERATION) -{ - /* The bios uses nop as a software method call - - * it seems to expect a notify interrupt if the parameter isn't 0. - * According to a nouveau guy it should still be a nop regardless - * of the parameter. It's possible a debug register enables this, - * but nothing obvious sticks out. Weird. - */ - if (parameter == 0) { - return; - } - - unsigned channel_id = - GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)); - - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_CHID, - channel_id); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_SUBCH, - subchannel); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_MTHD, - method); - pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter; - pg->regs[NV_PGRAPH_NSOURCE] = - NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */ - pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; - pg->waiting_for_nop = true; - - qemu_mutex_unlock(&pg->lock); - qemu_mutex_lock_iothread(); - nv2a_update_irq(d); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&pg->lock); -} - -DEF_METHOD(NV097, WAIT_FOR_IDLE) -{ - pgraph_update_surface(d, false, true, true); -} - -DEF_METHOD(NV097, SET_FLIP_READ) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D, - parameter); -} - -DEF_METHOD(NV097, SET_FLIP_WRITE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D, - parameter); -} - -DEF_METHOD(NV097, SET_FLIP_MODULO) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D, - parameter); -} - -DEF_METHOD(NV097, FLIP_INCREMENT_WRITE) -{ - uint32_t old = - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D); - - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_WRITE_3D, - (GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_WRITE_3D)+1) - % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_MODULO_3D) ); - - uint32_t new = - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D); - - trace_nv2a_pgraph_flip_increment_write(old, new); - NV2A_GL_DFRAME_TERMINATOR(); - pg->frame_time++; -} - -DEF_METHOD(NV097, FLIP_STALL) -{ - trace_nv2a_pgraph_flip_stall(); - pgraph_update_surface(d, false, true, true); - nv2a_profile_flip_stall(); - pg->waiting_for_flip = true; -} - -// TODO: these should be loading the dma objects from ramin here? - -DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES) -{ - pg->dma_notifies = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_A) -{ - pg->dma_a = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_B) -{ - pg->dma_b = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE) -{ - pg->dma_state = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR) -{ - /* try to get any straggling draws in before the surface's changed :/ */ - pgraph_update_surface(d, false, true, true); - - pg->dma_color = parameter; - pg->surface_color.buffer_dirty = true; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA) -{ - pg->dma_zeta = parameter; - pg->surface_zeta.buffer_dirty = true; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A) -{ - pg->dma_vertex_a = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B) -{ - pg->dma_vertex_b = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE) -{ - pg->dma_semaphore = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT) -{ - pgraph_process_pending_reports(d); - - pg->dma_report = parameter; -} - -DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.clip_x = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X); - pg->surface_shape.clip_width = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH); -} - -DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.clip_y = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y); - pg->surface_shape.clip_height = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT); -} - -DEF_METHOD(NV097, SET_SURFACE_FORMAT) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.color_format = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR); - pg->surface_shape.zeta_format = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA); - pg->surface_shape.anti_aliasing = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING); - pg->surface_shape.log_width = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH); - pg->surface_shape.log_height = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT); - - int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE); - if (surface_type != pg->surface_type) { - pg->surface_type = surface_type; - pg->surface_color.buffer_dirty = true; - pg->surface_zeta.buffer_dirty = true; - } -} - -DEF_METHOD(NV097, SET_SURFACE_PITCH) -{ - pgraph_update_surface(d, false, true, true); - unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR); - unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA); - - pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch); - pg->surface_color.pitch = color_pitch; - - pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch); - pg->surface_zeta.pitch = zeta_pitch; -} - -DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET) -{ - pgraph_update_surface(d, false, true, true); - pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter); - pg->surface_color.offset = parameter; -} - -DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET) -{ - pgraph_update_surface(d, false, true, true); - pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter); - pg->surface_zeta.offset = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW) -{ - int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4; - pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0) -{ - pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1) -{ - pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_ADDRESS) -{ - int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64; - pg->regs[NV_PGRAPH_TEXADDRESS0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_CONTROL0) -{ - pgraph_update_surface(d, false, true, true); - - bool stencil_write_enable = - parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE, - stencil_write_enable); - - uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT); - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format); - - bool z_perspective = - parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, - z_perspective); -} - -DEF_METHOD(NV097, SET_COLOR_MATERIAL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION, - (parameter >> 0) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT, - (parameter >> 2) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE, - (parameter >> 4) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR, - (parameter >> 6) & 3); -} - -DEF_METHOD(NV097, SET_FOG_MODE) -{ - /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */ - unsigned int mode; - switch (parameter) { - case NV097_SET_FOG_MODE_V_LINEAR: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break; - case NV097_SET_FOG_MODE_V_EXP: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break; - case NV097_SET_FOG_MODE_V_EXP2: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break; - case NV097_SET_FOG_MODE_V_EXP_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break; - case NV097_SET_FOG_MODE_V_EXP2_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break; - case NV097_SET_FOG_MODE_V_LINEAR_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOG_MODE, - mode); -} - -DEF_METHOD(NV097, SET_FOG_GEN_MODE) -{ - unsigned int mode; - switch (parameter) { - case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break; - case NV097_SET_FOG_GEN_MODE_V_RADIAL: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break; - case NV097_SET_FOG_GEN_MODE_V_PLANAR: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break; - case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break; - case NV097_SET_FOG_GEN_MODE_V_FOG_X: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGGENMODE, mode); -} - -DEF_METHOD(NV097, SET_FOG_ENABLE) -{ - /* - FIXME: There is also: - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGENABLE, - parameter); - */ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOGENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_FOG_COLOR) -{ - /* PGRAPH channels are ARGB, parameter channels are ABGR */ - uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED); - uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN); - uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE); - uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_RED, red); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_GREEN, green); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_BLUE, blue); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha); -} - -DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter); -} - -DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL) -{ - int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4; - for (; slot < 8; ++slot) { - pg->regs[NV_PGRAPH_WINDOWCLIPX0 + slot * 4] = parameter; - } -} - -DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL) -{ - int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4; - for (; slot < 8; ++slot) { - pg->regs[NV_PGRAPH_WINDOWCLIPY0 + slot * 4] = parameter; - } -} - -DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter); -} - -DEF_METHOD(NV097, SET_BLEND_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EN, parameter); -} - -DEF_METHOD(NV097, SET_CULL_FACE_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_DITHER_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter); -} - -DEF_METHOD(NV097, SET_LIGHTING_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_LIGHTING, - parameter); -} - -DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_POINTPARAMSENABLE, - parameter); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_SKIN_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_SKIN, - parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter); -} - -DEF_METHOD(NV097, SET_ALPHA_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF); -} - -DEF_METHOD(NV097, SET_ALPHA_REF) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAREF, parameter); -} - -DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR) -{ - unsigned int factor; - switch (parameter) { - case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO: - factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; - default: - NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_SFACTOR, factor); -} - -DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR) -{ - unsigned int factor; - switch (parameter) { - case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO: - factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; - default: - NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_DFACTOR, factor); -} - -DEF_METHOD(NV097, SET_BLEND_COLOR) -{ - pg->regs[NV_PGRAPH_BLENDCOLOR] = parameter; -} - -DEF_METHOD(NV097, SET_BLEND_EQUATION) -{ - unsigned int equation; - switch (parameter) { - case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT: - equation = 0; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT: - equation = 1; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_ADD: - equation = 2; break; - case NV097_SET_BLEND_EQUATION_V_MIN: - equation = 3; break; - case NV097_SET_BLEND_EQUATION_V_MAX: - equation = 4; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED: - equation = 5; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED: - equation = 6; break; - default: - NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EQN, equation); -} - -DEF_METHOD(NV097, SET_DEPTH_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZFUNC, - parameter & 0xF); -} - -DEF_METHOD(NV097, SET_COLOR_MASK) -{ - pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg); - - bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE; - bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE; - bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE; - bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue); -} - -DEF_METHOD(NV097, SET_DEPTH_MASK) -{ - pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg); - - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_MASK) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC_REF) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_SHADE_MODE) -{ - switch (parameter) { - case NV097_SET_SHADE_MODE_V_FLAT: - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT); - break; - case NV097_SET_SHADE_MODE_V_SMOOTH: - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); - break; - default: - /* Discard */ - break; - } -} - -DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) -{ - pg->regs[NV_PGRAPH_ZOFFSETFACTOR] = parameter; -} - -DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) -{ - pg->regs[NV_PGRAPH_ZOFFSETBIAS] = parameter; -} - -DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACEMODE, - kelvin_map_polygon_mode(parameter)); -} - -DEF_METHOD(NV097, SET_BACK_POLYGON_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_BACKFACEMODE, - kelvin_map_polygon_mode(parameter)); -} - -DEF_METHOD(NV097, SET_CLIP_MIN) -{ - pg->regs[NV_PGRAPH_ZCLIPMIN] = parameter; -} - -DEF_METHOD(NV097, SET_CLIP_MAX) -{ - pg->regs[NV_PGRAPH_ZCLIPMAX] = parameter; -} - -DEF_METHOD(NV097, SET_CULL_FACE) -{ - unsigned int face; - switch (parameter) { - case NV097_SET_CULL_FACE_V_FRONT: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break; - case NV097_SET_CULL_FACE_V_BACK: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break; - case NV097_SET_CULL_FACE_V_FRONT_AND_BACK: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLCTRL, - face); -} - -DEF_METHOD(NV097, SET_FRONT_FACE) -{ - bool ccw; - switch (parameter) { - case NV097_SET_FRONT_FACE_V_CW: - ccw = false; break; - case NV097_SET_FRONT_FACE_V_CCW: - ccw = true; break; - default: - NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACE, - ccw ? 1 : 0); -} - -DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, - parameter); -} - -DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION) -{ - int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4; - // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true; -} - -DEF_METHOD(NV097, SET_MATERIAL_ALPHA) -{ - pg->material_alpha = *(float*)¶meter; -} - -DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK) -{ - SET_MASK(d->pgraph.regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_LIGHTS, - parameter); -} - -DEF_METHOD(NV097, SET_TEXGEN_S) -{ - int slot = (method - NV097_SET_TEXGEN_S) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S - : NV_PGRAPH_CSV1_A_T0_S; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 0)); -} - -DEF_METHOD(NV097, SET_TEXGEN_T) -{ - int slot = (method - NV097_SET_TEXGEN_T) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T - : NV_PGRAPH_CSV1_A_T0_T; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 1)); -} - -DEF_METHOD(NV097, SET_TEXGEN_R) -{ - int slot = (method - NV097_SET_TEXGEN_R) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R - : NV_PGRAPH_CSV1_A_T0_R; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 2)); -} - -DEF_METHOD(NV097, SET_TEXGEN_Q) -{ - int slot = (method - NV097_SET_TEXGEN_Q) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q - : NV_PGRAPH_CSV1_A_T0_Q; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 3)); -} - -DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE) -{ - int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4; - pg->texture_matrix_enable[slot] = parameter; -} - -DEF_METHOD(NV097, SET_POINT_SIZE) -{ - SET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V, parameter); -} - -DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX) -{ - int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4; - // pg->projection_matrix[slot] = *(float*)¶meter; - unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4; - pg->vsh_constants[row][slot%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX) -{ - int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4; - unsigned int matnum = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4; - pg->vsh_constants[row][entry % 4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX) -{ - int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4; - unsigned int matnum = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4; - pg->vsh_constants[row][entry % 4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX) -{ - int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4; - unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4; - pg->vsh_constants[row][slot%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX) -{ - int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4; - unsigned int tex = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4; - pg->vsh_constants[row][entry%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_FOG_PARAMS) -{ - int slot = (method - NV097_SET_FOG_PARAMS) / 4; - if (slot < 2) { - pg->regs[NV_PGRAPH_FOGPARAM0 + slot*4] = parameter; - } else { - /* FIXME: No idea where slot = 2 is */ - } - - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true; -} - -/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */ -DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S) -{ - int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4; - unsigned int tex = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4; - pg->vsh_constants[row][entry%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_TEXGEN_REF, - parameter); -} - -DEF_METHOD_INC(NV097, SET_FOG_PLANE) -{ - int slot = (method - NV097_SET_FOG_PLANE) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true; -} - -DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4; - // ?? - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true; -} - -DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET) -{ - int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true; -} - -DEF_METHOD_INC(NV097, SET_POINT_PARAMS) -{ - int slot = (method - NV097_SET_POINT_PARAMS) / 4; - pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */ -} - -DEF_METHOD_INC(NV097, SET_EYE_POSITION) -{ - int slot = (method - NV097_SET_EYE_POSITION) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0) -{ - int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4; - pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1) -{ - int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4; - pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW) -{ - int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4; - pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW) -{ - int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4; - pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE) -{ - int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true; -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM) -{ - int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4; - - int program_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR); - - assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - pg->program_data[program_load][slot%4] = parameter; - pg->program_data_dirty = true; - - if (slot % 4 == 3) { - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1); - } -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT) -{ - int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4; - int const_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR); - - assert(const_load < NV2A_VERTEXSHADER_CONSTANTS); - // VertexShaderConstant *constant = &pg->constants[const_load]; - pg->vsh_constants_dirty[const_load] |= - (parameter != pg->vsh_constants[const_load][slot%4]); - pg->vsh_constants[const_load][slot%4] = parameter; - - if (slot % 4 == 3) { - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX3F) -{ - int slot = (method - NV097_SET_VERTEX3F) / 4; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); - attribute->inline_value[slot] = *(float*)¶meter; - attribute->inline_value[3] = 1.0f; - if (slot == 2) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -/* Handles NV097_SET_BACK_LIGHT_* */ -DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4; - unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16; - slot /= 16; /* [Light index] */ - assert(slot < 8); - switch(part * 4) { - case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ... - NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true; - break; - case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ... - NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true; - break; - case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ... - NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true; - break; - default: - assert(false); - break; - } -} - -/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */ -DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4; - unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32; - slot /= 32; /* [Light index] */ - assert(slot < 8); - switch(part * 4) { - case NV097_SET_LIGHT_AMBIENT_COLOR ... - NV097_SET_LIGHT_AMBIENT_COLOR + 8: - part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true; - break; - case NV097_SET_LIGHT_DIFFUSE_COLOR ... - NV097_SET_LIGHT_DIFFUSE_COLOR + 8: - part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true; - break; - case NV097_SET_LIGHT_SPECULAR_COLOR ... - NV097_SET_LIGHT_SPECULAR_COLOR + 8: - part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true; - break; - case NV097_SET_LIGHT_LOCAL_RANGE: - pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter; - pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true; - break; - case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ... - NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8: - part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4; - pg->light_infinite_half_vector[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_INFINITE_DIRECTION ... - NV097_SET_LIGHT_INFINITE_DIRECTION + 8: - part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4; - pg->light_infinite_direction[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_SPOT_FALLOFF ... - NV097_SET_LIGHT_SPOT_FALLOFF + 8: - part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true; - break; - case NV097_SET_LIGHT_SPOT_DIRECTION ... - NV097_SET_LIGHT_SPOT_DIRECTION + 12: - part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true; - break; - case NV097_SET_LIGHT_LOCAL_POSITION ... - NV097_SET_LIGHT_LOCAL_POSITION + 8: - part -= NV097_SET_LIGHT_LOCAL_POSITION / 4; - pg->light_local_position[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_LOCAL_ATTENUATION ... - NV097_SET_LIGHT_LOCAL_ATTENUATION + 8: - part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4; - pg->light_local_attenuation[slot][part] = *(float*)¶meter; - break; - default: - assert(false); - break; - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX4F) -{ - int slot = (method - NV097_SET_VERTEX4F) / 4; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); - attribute->inline_value[slot] = *(float*)¶meter; - if (slot == 3) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_NORMAL3S) -{ - int slot = (method - NV097_SET_NORMAL3S) / 4; - unsigned int part = slot % 2; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL); - int16_t val = parameter & 0xFFFF; - attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f); - val = parameter >> 16; - attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f); -} - -#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - unsigned int part = slot % 2; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[part * 2 + 0] = \ - (float)(int16_t)(parameter & 0xFFFF); \ - attribute->inline_value[part * 2 + 1] = \ - (float)(int16_t)(parameter >> 16); \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATTRIBUTE_4S - -#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \ - do { \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \ - attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \ - attribute->inline_value[2] = 0.0f; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATRIBUTE_TEX_2S - -#define SET_VERTEX_COLOR_3F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F) -{ - SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F) -{ - SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR); -} - -#undef SET_VERTEX_COLOR_3F - -#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_NORMAL3F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL); -} - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F, - NV2A_VERTEX_ATTR_SPECULAR); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1); -} - - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATTRIBUTE_F - -#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - attribute->inline_value[2] = 0.0f; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F, - NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F, - NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F, - NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F, - NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATRIBUTE_TEX_2F - -#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \ - do { \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \ - attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \ - attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \ - attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB) -{ - SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB, - NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB) -{ - SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB, - NV2A_VERTEX_ATTR_SPECULAR); -} - -#undef SET_VERTEX_ATTRIBUTE_4UB - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT) -{ - int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4; - VertexAttribute *attr = &pg->vertex_attributes[slot]; - attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE); - attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE); - attr->stride = GET_MASK(parameter, - NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE); - attr->gl_count = attr->count; - - NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", - attr->format, attr->count, attr->stride); - - switch (attr->format) { - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: - attr->gl_type = GL_UNSIGNED_BYTE; - attr->gl_normalize = GL_TRUE; - attr->size = 1; - assert(attr->count == 4); - // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt - attr->gl_count = GL_BGRA; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: - attr->gl_type = GL_UNSIGNED_BYTE; - attr->gl_normalize = GL_TRUE; - attr->size = 1; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: - attr->gl_type = GL_SHORT; - attr->gl_normalize = GL_TRUE; - attr->size = 2; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: - attr->gl_type = GL_FLOAT; - attr->gl_normalize = GL_FALSE; - attr->size = 4; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: - attr->gl_type = GL_SHORT; - attr->gl_normalize = GL_FALSE; - attr->size = 2; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: - /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ - attr->gl_type = GL_INT; - attr->size = 4; - assert(attr->count == 1); - attr->needs_conversion = true; - break; - default: - fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); - assert(false); - break; - } - - if (attr->needs_conversion) { - pg->compressed_attrs |= (1 << slot); - } else { - pg->compressed_attrs &= ~(1 << slot); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET) -{ - int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4; - - pg->vertex_attributes[slot].dma_select = parameter & 0x80000000; - pg->vertex_attributes[slot].offset = parameter & 0x7fffffff; -} - -DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP_ENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_LOGIC_OP) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP, - parameter & 0xF); -} - -static void pgraph_process_pending_report(NV2AState *d, QueryReport *r) -{ - PGRAPHState *pg = &d->pgraph; - - if (r->clear) { - pg->zpass_pixel_count_result = 0; - return; - } - - uint8_t type = GET_MASK(r->parameter, NV097_GET_REPORT_TYPE); - assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); - - /* FIXME: Multisampling affects this (both: OGL and Xbox GPU), - * not sure if CLEARs also count - */ - /* FIXME: What about clipping regions etc? */ - for (int i = 0; i < r->query_count; i++) { - GLuint gl_query_result = 0; - glGetQueryObjectuiv(r->queries[i], GL_QUERY_RESULT, &gl_query_result); - gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor; - pg->zpass_pixel_count_result += gl_query_result; - } - - if (r->query_count) { - glDeleteQueries(r->query_count, r->queries); - g_free(r->queries); - } - - uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */ - uint32_t done = 0; - - hwaddr report_dma_len; - uint8_t *report_data = - (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len); - - hwaddr offset = GET_MASK(r->parameter, NV097_GET_REPORT_OFFSET); - assert(offset < report_dma_len); - report_data += offset; - - stq_le_p((uint64_t *)&report_data[0], timestamp); - stl_le_p((uint32_t *)&report_data[8], pg->zpass_pixel_count_result); - stl_le_p((uint32_t *)&report_data[12], done); -} - -void pgraph_process_pending_reports(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - QueryReport *r, *next; - - QSIMPLEQ_FOREACH_SAFE(r, &pg->report_queue, entry, next) { - pgraph_process_pending_report(d, r); - QSIMPLEQ_REMOVE_HEAD(&pg->report_queue, entry); - g_free(r); - } -} - -DEF_METHOD(NV097, CLEAR_REPORT_VALUE) -{ - /* FIXME: Does this have a value in parameter? Also does this (also?) modify - * the report memory block? - */ - if (pg->gl_zpass_pixel_count_query_count) { - glDeleteQueries(pg->gl_zpass_pixel_count_query_count, - pg->gl_zpass_pixel_count_queries); - pg->gl_zpass_pixel_count_query_count = 0; - } - - QueryReport *r = g_malloc(sizeof(QueryReport)); - r->clear = true; - QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry); -} - -DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE) -{ - pg->zpass_pixel_count_enable = parameter; -} - -DEF_METHOD(NV097, GET_REPORT) -{ - uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); - assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); - - QueryReport *r = g_malloc(sizeof(QueryReport)); - r->clear = false; - r->parameter = parameter; - r->query_count = pg->gl_zpass_pixel_count_query_count; - r->queries = pg->gl_zpass_pixel_count_queries; - QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry); - - pg->gl_zpass_pixel_count_query_count = 0; - pg->gl_zpass_pixel_count_queries = NULL; -} - -DEF_METHOD_INC(NV097, SET_EYE_DIRECTION) -{ - int slot = (method - NV097_SET_EYE_DIRECTION) / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true; -} - -static void pgraph_reset_draw_arrays(PGRAPHState *pg) -{ - pg->draw_arrays_length = 0; - pg->draw_arrays_min_start = -1; - pg->draw_arrays_max_count = 0; - pg->draw_arrays_prevent_connect = false; -} - -static void pgraph_reset_inline_buffers(PGRAPHState *pg) -{ - pg->inline_elements_length = 0; - pg->inline_array_length = 0; - pg->inline_buffer_length = 0; - pgraph_reset_draw_arrays(pg); -} - -static void pgraph_flush_draw(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - if (!(pg->color_binding || pg->zeta_binding)) { - pgraph_reset_inline_buffers(pg); - return; - } - assert(pg->shader_binding); - - if (pg->draw_arrays_length) { - NV2A_GL_DPRINTF(false, "Draw Arrays"); - nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); - assert(pg->inline_elements_length == 0); - assert(pg->inline_buffer_length == 0); - assert(pg->inline_array_length == 0); - - pgraph_bind_vertex_attributes(d, pg->draw_arrays_min_start, - pg->draw_arrays_max_count - 1, - false, 0, - pg->draw_arrays_max_count - 1); - glMultiDrawArrays(pg->shader_binding->gl_primitive_mode, - pg->gl_draw_arrays_start, - pg->gl_draw_arrays_count, - pg->draw_arrays_length); - } else if (pg->inline_elements_length) { - NV2A_GL_DPRINTF(false, "Inline Elements"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); - assert(pg->inline_buffer_length == 0); - assert(pg->inline_array_length == 0); - - uint32_t min_element = (uint32_t)-1; - uint32_t max_element = 0; - for (int i=0; i < pg->inline_elements_length; i++) { - max_element = MAX(pg->inline_elements[i], max_element); - min_element = MIN(pg->inline_elements[i], min_element); - } - - pgraph_bind_vertex_attributes( - d, min_element, max_element, false, 0, - pg->inline_elements[pg->inline_elements_length - 1]); - - VertexKey k; - memset(&k, 0, sizeof(VertexKey)); - k.count = pg->inline_elements_length; - k.gl_type = GL_UNSIGNED_INT; - k.gl_normalize = GL_FALSE; - k.stride = sizeof(uint32_t); - uint64_t h = fast_hash((uint8_t*)pg->inline_elements, - pg->inline_elements_length * 4); - - LruNode *node = lru_lookup(&pg->element_cache, h, &k); - VertexLruNode *found = container_of(node, VertexLruNode, node); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer); - if (!found->initialized) { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, - pg->inline_elements_length * 4, - pg->inline_elements, GL_STATIC_DRAW); - found->initialized = true; - } else { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY); - } - glDrawElements(pg->shader_binding->gl_primitive_mode, - pg->inline_elements_length, GL_UNSIGNED_INT, - (void *)0); - } else if (pg->inline_buffer_length) { - NV2A_GL_DPRINTF(false, "Inline Buffer"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); - assert(pg->inline_array_length == 0); - - if (pg->compressed_attrs) { - pg->compressed_attrs = 0; - pgraph_bind_shaders(pg); - } - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - if (attr->inline_buffer_populated) { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); - glBindBuffer(GL_ARRAY_BUFFER, attr->gl_inline_buffer); - glBufferData(GL_ARRAY_BUFFER, - pg->inline_buffer_length * sizeof(float) * 4, - attr->inline_buffer, GL_STREAM_DRAW); - glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(i); - attr->inline_buffer_populated = false; - memcpy(attr->inline_value, - attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, - sizeof(attr->inline_value)); - } else { - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - } - } - - glDrawArrays(pg->shader_binding->gl_primitive_mode, - 0, pg->inline_buffer_length); - } else if (pg->inline_array_length) { - NV2A_GL_DPRINTF(false, "Inline Array"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); - - unsigned int index_count = pgraph_bind_inline_array(d); - glDrawArrays(pg->shader_binding->gl_primitive_mode, - 0, index_count); - } else { - NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END"); - NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); - } - - pgraph_reset_inline_buffers(pg); -} - -DEF_METHOD(NV097, SET_BEGIN_END) -{ - uint32_t control_0 = pg->regs[NV_PGRAPH_CONTROL_0]; - bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; - bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; - bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; - bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; - bool color_write = mask_alpha || mask_red || mask_green || mask_blue; - bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; - bool stencil_test = - pg->regs[NV_PGRAPH_CONTROL_1] & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; - bool is_nop_draw = !(color_write || depth_test || stencil_test); - - if (parameter == NV097_SET_BEGIN_END_OP_END) { - if (pg->primitive_mode == PRIM_TYPE_INVALID) { - NV2A_DPRINTF("End without Begin!\n"); - } - nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS); - - if (is_nop_draw) { - // FIXME: Check PGRAPH register 0x880. - // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit - // check that will raise an exception in the case that a draw should - // modify the color and/or zeta buffer but the target(s) are masked - // off. This check only seems to trigger during the fragment - // processing, it is legal to attempt a draw that is entirely - // clipped regardless of 0x880. See xemu#635 for context. - return; - } - - pgraph_flush_draw(d); - - /* End of visibility testing */ - if (pg->zpass_pixel_count_enable) { - nv2a_profile_inc_counter(NV2A_PROF_QUERY); - glEndQuery(GL_SAMPLES_PASSED); - } - - pg->draw_time++; - if (pg->color_binding && pgraph_color_write_enabled(pg)) { - pg->color_binding->draw_time = pg->draw_time; - } - if (pg->zeta_binding && pgraph_zeta_write_enabled(pg)) { - pg->zeta_binding->draw_time = pg->draw_time; - } - - pgraph_set_surface_dirty(pg, color_write, depth_test || stencil_test); - - NV2A_GL_DGROUP_END(); - pg->primitive_mode = PRIM_TYPE_INVALID; - } else { - NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", parameter); - if (pg->primitive_mode != PRIM_TYPE_INVALID) { - NV2A_DPRINTF("Begin without End!\n"); - } - assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON); - pg->primitive_mode = parameter; - - pgraph_update_surface(d, true, true, depth_test || stencil_test); - pgraph_reset_inline_buffers(pg); - - if (is_nop_draw) { - return; - } - - assert(pg->color_binding || pg->zeta_binding); - - pgraph_bind_textures(d); - pgraph_bind_shaders(pg); - - glColorMask(mask_red, mask_green, mask_blue, mask_alpha); - glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE)); - glStencilMask(GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE)); - - if (pg->regs[NV_PGRAPH_BLEND] & NV_PGRAPH_BLEND_EN) { - glEnable(GL_BLEND); - uint32_t sfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_SFACTOR); - uint32_t dfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_DFACTOR); - assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_map)); - assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_map)); - glBlendFunc(pgraph_blend_factor_map[sfactor], - pgraph_blend_factor_map[dfactor]); - - uint32_t equation = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_EQN); - assert(equation < ARRAY_SIZE(pgraph_blend_equation_map)); - glBlendEquation(pgraph_blend_equation_map[equation]); - - uint32_t blend_color = pg->regs[NV_PGRAPH_BLENDCOLOR]; - glBlendColor( ((blend_color >> 16) & 0xFF) / 255.0f, /* red */ - ((blend_color >> 8) & 0xFF) / 255.0f, /* green */ - (blend_color & 0xFF) / 255.0f, /* blue */ - ((blend_color >> 24) & 0xFF) / 255.0f);/* alpha */ - } else { - glDisable(GL_BLEND); - } - - /* Face culling */ - if (pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_CULLENABLE) { - uint32_t cull_face = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLCTRL); - assert(cull_face < ARRAY_SIZE(pgraph_cull_face_map)); - glCullFace(pgraph_cull_face_map[cull_face]); - glEnable(GL_CULL_FACE); - } else { - glDisable(GL_CULL_FACE); - } - - /* Clipping */ - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - - /* Front-face select */ - glFrontFace(pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_FRONTFACE - ? GL_CCW : GL_CW); - - /* Polygon offset */ - /* FIXME: GL implementation-specific, maybe do this in VS? */ - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - GLfloat zfactor = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETFACTOR]; - GLfloat zbias = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETBIAS]; - glPolygonOffset(zfactor, zbias); - } - - /* Depth testing */ - if (depth_test) { - glEnable(GL_DEPTH_TEST); - - uint32_t depth_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ZFUNC); - assert(depth_func < ARRAY_SIZE(pgraph_depth_func_map)); - glDepthFunc(pgraph_depth_func_map[depth_func]); - } else { - glDisable(GL_DEPTH_TEST); - } - - if (GET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } - - if (GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { - glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - } - - if (stencil_test) { - glEnable(GL_STENCIL_TEST); - - uint32_t stencil_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_FUNC); - uint32_t stencil_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_REF); - uint32_t func_mask = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); - uint32_t op_fail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); - uint32_t op_zfail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); - uint32_t op_zpass = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); - - assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_map)); - assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_map)); - assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_map)); - assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_map)); - - glStencilFunc( - pgraph_stencil_func_map[stencil_func], - stencil_ref, - func_mask); - - glStencilOp( - pgraph_stencil_op_map[op_fail], - pgraph_stencil_op_map[op_zfail], - pgraph_stencil_op_map[op_zpass]); - - } else { - glDisable(GL_STENCIL_TEST); - } - - /* Dither */ - /* FIXME: GL implementation dependent */ - if (pg->regs[NV_PGRAPH_CONTROL_0] & - NV_PGRAPH_CONTROL_0_DITHERENABLE) { - glEnable(GL_DITHER); - } else { - glDisable(GL_DITHER); - } - - glEnable(GL_PROGRAM_POINT_SIZE); - - bool anti_aliasing = GET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE); - - /* Edge Antialiasing */ - if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { - glEnable(GL_LINE_SMOOTH); - glLineWidth(MIN(supportedSmoothLineWidthRange[1], pg->surface_scale_factor)); - } else { - glDisable(GL_LINE_SMOOTH); - glLineWidth(MIN(supportedAliasedLineWidthRange[1], pg->surface_scale_factor)); - } - if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { - glEnable(GL_POLYGON_SMOOTH); - } else { - glDisable(GL_POLYGON_SMOOTH); - } - - unsigned int vp_width = pg->surface_binding_dim.width, - vp_height = pg->surface_binding_dim.height; - pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); - glViewport(0, 0, vp_width, vp_height); - - /* Surface clip */ - /* FIXME: Consider moving to PSH w/ window clip */ - unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x, - ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y; - unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, - ymax = ymin + pg->surface_shape.clip_height - 1; - - unsigned int scissor_width = xmax - xmin + 1, - scissor_height = ymax - ymin + 1; - pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); - pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); - pgraph_apply_scaling_factor(pg, &xmin, &ymin); - pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); - - glEnable(GL_SCISSOR_TEST); - glScissor(xmin, ymin, scissor_width, scissor_height); - - /* Visibility testing */ - if (pg->zpass_pixel_count_enable) { - pg->gl_zpass_pixel_count_query_count++; - pg->gl_zpass_pixel_count_queries = (GLuint*)g_realloc( - pg->gl_zpass_pixel_count_queries, - sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count); - - GLuint gl_query; - glGenQueries(1, &gl_query); - pg->gl_zpass_pixel_count_queries[ - pg->gl_zpass_pixel_count_query_count - 1] = gl_query; - glBeginQuery(GL_SAMPLES_PASSED, gl_query); - } - } -} - -DEF_METHOD(NV097, SET_TEXTURE_OFFSET) -{ - int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64; - pg->regs[NV_PGRAPH_TEXOFFSET0 + slot * 4] = parameter; - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_FORMAT) -{ - int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64; - - bool dma_select = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2; - bool cubemap = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE); - unsigned int border_source = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE); - unsigned int dimensionality = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY); - unsigned int color_format = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR); - unsigned int levels = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS); - unsigned int log_width = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U); - unsigned int log_height = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V); - unsigned int log_depth = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P); - - uint32_t *reg = &pg->regs[NV_PGRAPH_TEXFMT0 + slot * 4]; - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_COLOR, color_format); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth); - - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_CONTROL0) -{ - int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64; - pg->regs[NV_PGRAPH_TEXCTL0_0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_CONTROL1) -{ - int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64; - pg->regs[NV_PGRAPH_TEXCTL1_0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_FILTER) -{ - int slot = (method - NV097_SET_TEXTURE_FILTER) / 64; - pg->regs[NV_PGRAPH_TEXFILTER0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT) -{ - int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64; - pg->regs[NV_PGRAPH_TEXIMAGERECT0 + slot * 4] = parameter; - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_PALETTE) -{ - int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64; - - bool dma_select = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1; - unsigned int length = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH); - unsigned int offset = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET); - - uint32_t *reg = &pg->regs[NV_PGRAPH_TEXPALETTE0 + slot * 4]; - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select); - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length); - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset); - - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR) -{ - int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64; - pg->regs[NV_PGRAPH_BORDERCOLOR0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4; - if (slot < 16) { - /* discard */ - return; - } - - slot -= 16; - const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01, - NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 }; - pg->regs[swizzle[slot % 4] + slot / 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64; - if (slot == 0) { - /* discard */ - return; - } - - slot--; - pg->regs[NV_PGRAPH_BUMPSCALE1 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64; - if (slot == 0) { - /* discard */ - return; - } - - slot--; - pg->regs[NV_PGRAPH_BUMPOFFSET1 + slot * 4] = parameter; -} - -static void pgraph_expand_draw_arrays(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - GLint start = pg->gl_draw_arrays_start[pg->draw_arrays_length - 1]; - GLsizei count = pg->gl_draw_arrays_count[pg->draw_arrays_length - 1]; - - /* Render any previously squashed DRAW_ARRAYS calls. This case would be - * triggered if a set of BEGIN+DA+END triplets is followed by the - * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */ - if (pg->draw_arrays_length > 1) { - pgraph_flush_draw(d); - } - assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); - for (unsigned int i = 0; i < count; i++) { - pg->inline_elements[pg->inline_elements_length++] = start + i; - } - - pgraph_reset_draw_arrays(pg); -} - -static void pgraph_check_within_begin_end_block(PGRAPHState *pg) -{ - if (pg->primitive_mode == PRIM_TYPE_INVALID) { - NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n"); - } -} - -DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16) -{ - pgraph_check_within_begin_end_block(pg); - - if (pg->draw_arrays_length) { - pgraph_expand_draw_arrays(d); - } - - assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF; - pg->inline_elements[pg->inline_elements_length++] = parameter >> 16; -} - -DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32) -{ - pgraph_check_within_begin_end_block(pg); - - if (pg->draw_arrays_length) { - pgraph_expand_draw_arrays(d); - } - - assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_elements[pg->inline_elements_length++] = parameter; -} - -DEF_METHOD(NV097, DRAW_ARRAYS) -{ - pgraph_check_within_begin_end_block(pg); - - unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX); - unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1; - - if (pg->inline_elements_length) { - /* FIXME: Determine HW behavior for overflow case. */ - assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); - assert(!pg->draw_arrays_prevent_connect); - - for (unsigned int i = 0; i < count; i++) { - pg->inline_elements[pg->inline_elements_length++] = start + i; - } - return; - } - - pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start); - pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count); - - assert(pg->draw_arrays_length < ARRAY_SIZE(pg->gl_draw_arrays_start)); - - /* Attempt to connect contiguous primitives */ - if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) { - unsigned int last_start = - pg->gl_draw_arrays_start[pg->draw_arrays_length - 1]; - GLsizei* last_count = - &pg->gl_draw_arrays_count[pg->draw_arrays_length - 1]; - if (start == (last_start + *last_count)) { - *last_count += count; - return; - } - } - - pg->gl_draw_arrays_start[pg->draw_arrays_length] = start; - pg->gl_draw_arrays_count[pg->draw_arrays_length] = count; - pg->draw_arrays_length++; - pg->draw_arrays_prevent_connect = false; -} - -DEF_METHOD_NON_INC(NV097, INLINE_ARRAY) -{ - pgraph_check_within_begin_end_block(pg); - assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_array[pg->inline_array_length++] = parameter; -} - -DEF_METHOD_INC(NV097, SET_EYE_VECTOR) -{ - int slot = (method - NV097_SET_EYE_VECTOR) / 4; - pg->regs[NV_PGRAPH_EYEVEC0 + slot * 4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4; - unsigned int part = slot % 2; - slot /= 2; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[part] = *(float*)¶meter; - /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */ - attribute->inline_value[2] = 0.0; - attribute->inline_value[3] = 1.0; - if ((slot == 0) && (part == 1)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4; - unsigned int part = slot % 4; - slot /= 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[part] = *(float*)¶meter; - if ((slot == 0) && (part == 3)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S) -{ - int slot = (method - NV097_SET_VERTEX_DATA2S) / 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); - attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); - attribute->inline_value[2] = 0.0; - attribute->inline_value[3] = 1.0; - if (slot == 0) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB) -{ - int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[0] = (parameter & 0xFF) / 255.0; - attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0; - attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0; - attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0; - if (slot == 0) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4; - unsigned int part = slot % 2; - slot /= 2; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - - attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF); - attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16); - if ((slot == 0) && (part == 1)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET) -{ - pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter; -} - -DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE) -{ - pgraph_update_surface(d, false, true, true); - - //qemu_mutex_unlock(&d->pgraph.lock); - //qemu_mutex_lock_iothread(); - - uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET]; - - hwaddr semaphore_dma_len; - uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore, - &semaphore_dma_len); - assert(semaphore_offset < semaphore_dma_len); - semaphore_data += semaphore_offset; - - stl_le_p((uint32_t*)semaphore_data, parameter); - - //qemu_mutex_lock(&d->pgraph.lock); - //qemu_mutex_unlock_iothread(); -} - -DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL) -{ - switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) { - case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL: - SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL); - break; - case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP: - SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP); - break; - default: - /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */ - assert(!"Invalid zclamp value"); - break; - } -} - -DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL) -{ - SET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE, - GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE)); - // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001) -} - -DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE) -{ - pg->regs[NV_PGRAPH_ZSTENCILCLEARVALUE] = parameter; -} - -DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE) -{ - pg->regs[NV_PGRAPH_COLORCLEARVALUE] = parameter; -} - -DEF_METHOD(NV097, CLEAR_SURFACE) -{ - pg->clearing = true; - - NV2A_DPRINTF("---------PRE CLEAR ------\n"); - GLbitfield gl_mask = 0; - - bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); - bool write_zeta = - (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); - - if (write_zeta) { - uint32_t clear_zstencil = - d->pgraph.regs[NV_PGRAPH_ZSTENCILCLEARVALUE]; - GLint gl_clear_stencil; - GLfloat gl_clear_depth; - - switch(pg->surface_shape.zeta_format) { - case NV097_SET_SURFACE_FORMAT_ZETA_Z16: { - uint16_t z = clear_zstencil & 0xFFFF; - /* FIXME: Remove bit for stencil clear? */ - if (pg->surface_shape.z_format) { - gl_clear_depth = convert_f16_to_float(z) / f16_max; - } else { - gl_clear_depth = z / (float)0xFFFF; - } - break; - } - case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: { - gl_clear_stencil = clear_zstencil & 0xFF; - uint32_t z = clear_zstencil >> 8; - if (pg->surface_shape.z_format) { - gl_clear_depth = convert_f24_to_float(z) / f24_max; - } else { - gl_clear_depth = z / (float)0xFFFFFF; - } - break; - } - default: - fprintf(stderr, "Unknown zeta surface format: 0x%x\n", pg->surface_shape.zeta_format); - assert(false); - break; - } - if (parameter & NV097_CLEAR_SURFACE_Z) { - gl_mask |= GL_DEPTH_BUFFER_BIT; - glDepthMask(GL_TRUE); - glClearDepth(gl_clear_depth); - } - if (parameter & NV097_CLEAR_SURFACE_STENCIL) { - gl_mask |= GL_STENCIL_BUFFER_BIT; - glStencilMask(0xff); - glClearStencil(gl_clear_stencil); - } - } - if (write_color) { - gl_mask |= GL_COLOR_BUFFER_BIT; - glColorMask((parameter & NV097_CLEAR_SURFACE_R) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_G) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_B) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_A) - ? GL_TRUE : GL_FALSE); - uint32_t clear_color = d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]; - - /* Handle RGB */ - GLfloat red, green, blue; - switch(pg->surface_shape.color_format) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5: - red = ((clear_color >> 10) & 0x1F) / 31.0f; - green = ((clear_color >> 5) & 0x1F) / 31.0f; - blue = (clear_color & 0x1F) / 31.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: - red = ((clear_color >> 11) & 0x1F) / 31.0f; - green = ((clear_color >> 5) & 0x3F) / 63.0f; - blue = (clear_color & 0x1F) / 31.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: - red = ((clear_color >> 16) & 0xFF) / 255.0f; - green = ((clear_color >> 8) & 0xFF) / 255.0f; - blue = (clear_color & 0xFF) / 255.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8: - /* Xbox D3D doesn't support clearing those */ - default: - red = 1.0f; - green = 0.0f; - blue = 1.0f; - fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported", - pg->surface_shape.color_format); - assert(false); - break; - } - - /* Handle alpha */ - GLfloat alpha; - switch(pg->surface_shape.color_format) { - /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we - * also have to clear non-alpha bits with alpha value? - * As GL doesn't own those pixels we'd have to do this on - * our own in xbox memory. - */ - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: - alpha = ((clear_color >> 24) & 0x7F) / 127.0f; - assert(false); /* Untested */ - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: - alpha = ((clear_color >> 24) & 0xFF) / 255.0f; - break; - default: - alpha = 1.0f; - break; - } - - glClearColor(red, green, blue, alpha); - } - - pgraph_update_surface(d, true, write_color, write_zeta); - - /* FIXME: Needs confirmation */ - unsigned int xmin = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMIN); - unsigned int xmax = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMAX); - unsigned int ymin = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMIN); - unsigned int ymax = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMAX); - - NV2A_DPRINTF( - "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n", - parameter, xmin, ymin, xmax, ymax, - d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]); - - unsigned int scissor_width = xmax - xmin + 1, - scissor_height = ymax - ymin + 1; - pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); - pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); - - NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, - xmin + scissor_width - 1, ymin + scissor_height - 1); - - bool full_clear = !xmin && !ymin && - scissor_width >= pg->surface_binding_dim.width && - scissor_height >= pg->surface_binding_dim.height; - - pgraph_apply_scaling_factor(pg, &xmin, &ymin); - pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); - - /* FIXME: Respect window clip?!?! */ - glEnable(GL_SCISSOR_TEST); - glScissor(xmin, ymin, scissor_width, scissor_height); - - /* Dither */ - /* FIXME: Maybe also disable it here? + GL implementation dependent */ - if (pg->regs[NV_PGRAPH_CONTROL_0] & NV_PGRAPH_CONTROL_0_DITHERENABLE) { - glEnable(GL_DITHER); - } else { - glDisable(GL_DITHER); - } - - glClear(gl_mask); - - glDisable(GL_SCISSOR_TEST); - - pgraph_set_surface_dirty(pg, write_color, write_zeta); - - if (pg->color_binding) { - pg->color_binding->cleared = full_clear && write_color; - } - if (pg->zeta_binding) { - pg->zeta_binding->cleared = full_clear && write_zeta; - } - - pg->clearing = false; -} - -DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL) -{ - pg->regs[NV_PGRAPH_CLEARRECTX] = parameter; -} - -DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL) -{ - pg->regs[NV_PGRAPH_CLEARRECTY] = parameter; -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR) -{ - int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4; - pg->regs[NV_PGRAPH_SPECFOGFACTOR0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE) -{ - pg->regs[NV_PGRAPH_SHADERCLIPMODE] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW) -{ - int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4; - pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_CONTROL) -{ - pg->regs[NV_PGRAPH_COMBINECTL] = parameter; -} - -DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD) -{ - pg->regs[NV_PGRAPH_SHADOWZSLOPETHRESHOLD] = parameter; - assert(parameter == 0x7F800000); /* FIXME: Unimplemented */ -} - -DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC, - parameter); -} - -DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM) -{ - pg->regs[NV_PGRAPH_SHADERPROG] = parameter; -} - -DEF_METHOD(NV097, SET_DOT_RGBMAPPING) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFF, - GET_MASK(parameter, 0xFFF)); -} - -DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFFF000, - GET_MASK(parameter, 0xFFFF000)); -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA) -{ - int slot = (method - NV097_SET_TRANSFORM_DATA) / 4; - pg->vertex_state_shader_v0[slot] = parameter; -} - -DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM) -{ - unsigned int program_start = parameter; - assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - Nv2aVshProgram program; - Nv2aVshParseResult result = nv2a_vsh_parse_program( - &program, - pg->program_data[program_start], - NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start); - assert(result == NV2AVPR_SUCCESS); - - Nv2aVshCPUXVSSExecutionState state_linkage; - Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state( - &state_linkage, (float*)pg->vsh_constants); - memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0)); - - nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty); - - nv2a_vsh_program_destroy(&program); -} - -DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE, - GET_MASK(parameter, - NV097_SET_TRANSFORM_EXECUTION_MODE_MODE)); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_RANGE_MODE, - GET_MASK(parameter, - NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE)); -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN) -{ - pg->enable_vertex_program_write = parameter; -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD) -{ - assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter); -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START) -{ - assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter); -} - -DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD) -{ - assert(parameter < NV2A_VERTEXSHADER_CONSTANTS); - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter); -} - - -void pgraph_context_switch(NV2AState *d, unsigned int channel_id) -{ - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - bool valid = channel_valid && pgraph_channel_id == channel_id; - if (!valid) { - SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); - - NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id); - - /* TODO: hardware context switching */ - assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3] - & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); - - d->pgraph.waiting_for_context_switch = true; - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH; - nv2a_update_irq(d); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } -} - -static void pgraph_method_log(unsigned int subchannel, - unsigned int graphics_class, - unsigned int method, uint32_t parameter) -{ - const char *method_name = "?"; - static unsigned int last = 0; - static unsigned int count = 0; - - if (last == NV097_ARRAY_ELEMENT16 && method != last) { - method_name = "NV097_ARRAY_ELEMENT16"; - trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last, - method_name, count); - NV2A_GL_DPRINTF(false, "pgraph method (%d) 0x%x %s * %d", subchannel, - last, method_name, count); - } - - if (method != NV097_ARRAY_ELEMENT16) { - uint32_t base = method; - switch (graphics_class) { - case NV_KELVIN_PRIMITIVE: { - int idx = METHOD_ADDR_TO_INDEX(method); - if (idx < ARRAY_SIZE(pgraph_kelvin_methods) && - pgraph_kelvin_methods[idx].handler) { - method_name = pgraph_kelvin_methods[idx].name; - base = pgraph_kelvin_methods[idx].base; - } - break; - } - default: - break; - } - - uint32_t offset = method - base; - trace_nv2a_pgraph_method(subchannel, graphics_class, method, - method_name, offset, parameter); - NV2A_GL_DPRINTF(false, - "pgraph method (%d): 0x%" PRIx32 " -> 0x%04" PRIx32 - " %s[%" PRId32 "] 0x%" PRIx32, - subchannel, graphics_class, method, method_name, offset, - parameter); - } - - if (method == last) { - count++; - } else { - count = 0; - } - last = method; -} - -static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, - unsigned int attr) -{ - VertexAttribute *attribute = &pg->vertex_attributes[attr]; - - if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) { - return; - } - - /* Now upload the previous attribute value */ - attribute->inline_buffer_populated = true; - for (int i = 0; i < pg->inline_buffer_length; i++) { - memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value, - sizeof(float) * 4); - } -} - -static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) -{ - pgraph_check_within_begin_end_block(pg); - assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attribute = &pg->vertex_attributes[i]; - if (attribute->inline_buffer_populated) { - memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4], - attribute->inline_value, sizeof(float) * 4); - } - } - - pg->inline_buffer_length++; -} - -void nv2a_gl_context_init(void) -{ - g_nv2a_context_render = glo_context_create(); - g_nv2a_context_display = glo_context_create(); - - glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, supportedSmoothLineWidthRange); - glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, supportedAliasedLineWidthRange); -} - -void nv2a_set_surface_scale_factor(unsigned int scale) -{ - NV2AState *d = g_nv2a; - - g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; - - qemu_mutex_unlock_iothread(); - - qemu_mutex_lock(&d->pfifo.lock); - qatomic_set(&d->pfifo.halt, true); - qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock(&d->pgraph.lock); - qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete); - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true); - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete); - - qemu_mutex_lock(&d->pgraph.lock); - qemu_event_reset(&d->pgraph.flush_complete); - qatomic_set(&d->pgraph.flush_pending, true); - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.flush_complete); - - qemu_mutex_lock(&d->pfifo.lock); - qatomic_set(&d->pfifo.halt, false); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock_iothread(); -} - -unsigned int nv2a_get_surface_scale_factor(void) -{ - return g_nv2a->pgraph.surface_scale_factor; -} - -static void pgraph_reload_surface_scale_factor(NV2AState *d) -{ - int factor = g_config.display.quality.surface_scale; - d->pgraph.surface_scale_factor = factor < 1 ? 1 : factor; -} - -void pgraph_init(NV2AState *d) -{ - int i; - - g_nv2a = d; - PGRAPHState *pg = &d->pgraph; - - pgraph_reload_surface_scale_factor(d); - - pg->frame_time = 0; - pg->draw_time = 0; - pg->downloads_pending = false; - - qemu_mutex_init(&pg->lock); - qemu_mutex_init(&pg->shader_cache_lock); - qemu_event_init(&pg->gl_sync_complete, false); - qemu_event_init(&pg->downloads_complete, false); - qemu_event_init(&pg->dirty_surfaces_download_complete, false); - qemu_event_init(&pg->flush_complete, false); - qemu_event_init(&pg->shader_cache_writeback_complete, false); - - /* fire up opengl */ - glo_set_current(g_nv2a_context_render); - -#ifdef DEBUG_NV2A_GL - gl_debug_initialize(); -#endif - - /* DXT textures */ - assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); - /* Internal RGB565 texture format */ - assert(glo_check_extension("GL_ARB_ES2_compatibility")); - - GLint max_vertex_attributes; - glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); - assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); - - - glGenFramebuffers(1, &pg->gl_framebuffer); - glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer); - - pgraph_init_render_to_texture(d); - QTAILQ_INIT(&pg->surfaces); - - QSIMPLEQ_INIT(&pg->report_queue); - - //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); - - // Initialize texture cache - const size_t texture_cache_size = 512; - lru_init(&pg->texture_cache); - pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode)); - assert(pg->texture_cache_entries != NULL); - for (i = 0; i < texture_cache_size; i++) { - lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node); - } - - pg->texture_cache.init_node = texture_cache_entry_init; - pg->texture_cache.compare_nodes = texture_cache_entry_compare; - pg->texture_cache.post_node_evict = texture_cache_entry_post_evict; - - // Initialize element cache - const size_t element_cache_size = 50*1024; - lru_init(&pg->element_cache); - pg->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode)); - assert(pg->element_cache_entries != NULL); - GLuint element_cache_buffers[element_cache_size]; - glGenBuffers(element_cache_size, element_cache_buffers); - for (i = 0; i < element_cache_size; i++) { - pg->element_cache_entries[i].gl_buffer = element_cache_buffers[i]; - lru_add_free(&pg->element_cache, &pg->element_cache_entries[i].node); - } - - pg->element_cache.init_node = vertex_cache_entry_init; - pg->element_cache.compare_nodes = vertex_cache_entry_compare; - - shader_cache_init(pg); - - pg->material_alpha = 0.0f; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); - pg->primitive_mode = PRIM_TYPE_INVALID; - - for (i=0; ivertex_attributes[i]; - glGenBuffers(1, &attribute->gl_inline_buffer); - attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH - * sizeof(float) * 4); - attribute->inline_buffer_populated = false; - } - glGenBuffers(1, &pg->gl_inline_array_buffer); - - glGenBuffers(1, &pg->gl_memory_buffer); - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer); - glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram), - NULL, GL_DYNAMIC_DRAW); - - glGenVertexArrays(1, &pg->gl_vertex_array); - glBindVertexArray(pg->gl_vertex_array); - - assert(glGetError() == GL_NO_ERROR); - - glo_set_current(g_nv2a_context_display); - pgraph_init_display_renderer(d); - - glo_set_current(NULL); -} - -void pgraph_destroy(PGRAPHState *pg) -{ - qemu_mutex_destroy(&pg->lock); - qemu_mutex_destroy(&pg->shader_cache_lock); - - glo_set_current(g_nv2a_context_render); - - // TODO: clear out surfaces - - glDeleteFramebuffers(1, &pg->gl_framebuffer); - - // Clear out shader cache - shader_write_cache_reload_list(pg); - free(pg->shader_cache_entries); - - // Clear out texture cache - lru_flush(&pg->texture_cache); - free(pg->texture_cache_entries); - - glo_set_current(NULL); - glo_context_destroy(g_nv2a_context_render); - glo_context_destroy(g_nv2a_context_display); -} - -static void pgraph_shader_update_constants(PGRAPHState *pg, - ShaderBinding *binding, - bool binding_changed, - bool vertex_program, - bool fixed_function) -{ - int i, j; - - /* update combiner constants */ - for (i = 0; i < 9; i++) { - uint32_t constant[2]; - if (i == 8) { - /* final combiner */ - constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0]; - constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1]; - } else { - constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; - } - - for (j = 0; j < 2; j++) { - GLint loc = binding->psh_constant_loc[i][j]; - if (loc != -1) { - float value[4]; - value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f; - value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f; - value[2] = (float) (constant[j] & 0xFF) / 255.0f; - value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f; - - glUniform4fv(loc, 1, value); - } - } - } - if (binding->alpha_ref_loc != -1) { - float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; - glUniform1f(binding->alpha_ref_loc, alpha_ref); - } - - - /* For each texture stage */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - GLint loc; - - /* Bump luminance only during stages 1 - 3 */ - if (i > 0) { - loc = binding->bump_mat_loc[i]; - if (loc != -1) { - float m[4]; - m[0] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)]; - m[1] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)]; - m[2] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)]; - m[3] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)]; - glUniformMatrix2fv(loc, 1, GL_FALSE, m); - } - loc = binding->bump_scale_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]); - } - loc = binding->bump_offset_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]); - } - } - - loc = pg->shader_binding->tex_scale_loc[i]; - if (loc != -1) { - assert(pg->texture_binding[i] != NULL); - glUniform1f(loc, (float)pg->texture_binding[i]->scale); - } - } - - if (binding->fog_color_loc != -1) { - uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR]; - glUniform4f(binding->fog_color_loc, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); - } - if (binding->fog_param_loc[0] != -1) { - glUniform1f(binding->fog_param_loc[0], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]); - } - if (binding->fog_param_loc[1] != -1) { - glUniform1f(binding->fog_param_loc[1], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]); - } - - float zmax; - switch (pg->surface_shape.zeta_format) { - case NV097_SET_SURFACE_FORMAT_ZETA_Z16: - zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; - break; - case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: - zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; - break; - default: - assert(0); - } - - if (fixed_function) { - /* update lighting constants */ - struct { - uint32_t* v; - bool* dirty; - GLint* locs; - size_t len; - } lighting_arrays[] = { - {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, - {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, - {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, - }; - - for (i=0; ilight_infinite_half_vector_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); - } - loc = binding->light_infinite_direction_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_direction[i]); - } - - loc = binding->light_local_position_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_position[i]); - } - loc = binding->light_local_attenuation_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_attenuation[i]); - } - } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); - float m33 = zmax; - float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0/m11, 0, 0, 0, - 0, 1.0/m22, 0, 0, - 0, 0, 1.0/m33, 0, - -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - glUniformMatrix4fv(binding->inv_viewport_loc, - 1, GL_FALSE, &invViewport[0]); - } - } - - /* update vertex program constants */ - for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; - - GLint loc = binding->vsh_constant_loc[i]; - if ((loc != -1) && - memcmp(binding->vsh_constants[i], pg->vsh_constants[i], - sizeof(pg->vsh_constants[1]))) { - glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]); - memcpy(binding->vsh_constants[i], pg->vsh_constants[i], - sizeof(pg->vsh_constants[i])); - } - - pg->vsh_constants_dirty[i] = false; - } - - if (binding->surface_size_loc != -1) { - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - glUniform2f(binding->surface_size_loc, - pg->surface_binding_dim.width / aa_width, - pg->surface_binding_dim.height / aa_height); - } - - if (binding->clip_range_loc != -1) { - float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN] / zmax * 2.0 - 1.0; - float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX] / zmax * 2.0 - 1.0; - glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); - } - - /* Clipping regions */ - unsigned int max_gl_width = pg->surface_binding_dim.width; - unsigned int max_gl_height = pg->surface_binding_dim.height; - pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); - - for (i = 0; i < 8; i++) { - uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4]; - unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); - unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; - uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4]; - unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); - unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; - pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); - pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); - - pgraph_apply_scaling_factor(pg, &x_min, &y_min); - pgraph_apply_scaling_factor(pg, &x_max, &y_max); - - /* Translate for the GL viewport origin */ - int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); - int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); - - glUniform4i(pg->shader_binding->clip_region_loc[i], - x_min, y_min_xlat, x_max, y_max_xlat); - } - - if (binding->material_alpha_loc != -1) { - glUniform1f(binding->material_alpha_loc, pg->material_alpha); - } -} - -static bool pgraph_bind_shaders_test_dirty(PGRAPHState *pg) -{ - #define CR_1(reg) CR_x(reg, 1) - #define CR_4(reg) CR_x(reg, 4) - #define CR_8(reg) CR_x(reg, 8) - #define CF(src, name) CF_x(typeof(src), (&src), name, 1) - #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src)) - #define CNAME(name) reg_check__ ## name - #define CX_x__define(type, name, x) static type CNAME(name)[x]; - #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x) - #define CF_x__define(type, src, name, x) CX_x__define(type, name, x) - #define CR_x__check(reg, x) \ - for (int i = 0; i < x; i++) { if (pg->regs[reg+i*4] != CNAME(reg)[i]) goto dirty; } - #define CF_x__check(type, src, name, x) \ - for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; } - #define CR_x__update(reg, x) \ - for (int i = 0; i < x; i++) { CNAME(reg)[i] = pg->regs[reg+i*4]; } - #define CF_x__update(type, src, name, x) \ - for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; } - - #define DIRTY_REGS \ - CR_1(NV_PGRAPH_COMBINECTL) \ - CR_1(NV_PGRAPH_SHADERCTL) \ - CR_1(NV_PGRAPH_SHADOWCTL) \ - CR_1(NV_PGRAPH_COMBINESPECFOG0) \ - CR_1(NV_PGRAPH_COMBINESPECFOG1) \ - CR_1(NV_PGRAPH_CONTROL_0) \ - CR_1(NV_PGRAPH_CONTROL_3) \ - CR_1(NV_PGRAPH_CSV0_C) \ - CR_1(NV_PGRAPH_CSV0_D) \ - CR_1(NV_PGRAPH_CSV1_A) \ - CR_1(NV_PGRAPH_CSV1_B) \ - CR_1(NV_PGRAPH_SETUPRASTER) \ - CR_1(NV_PGRAPH_SHADERPROG) \ - CR_8(NV_PGRAPH_COMBINECOLORI0) \ - CR_8(NV_PGRAPH_COMBINECOLORO0) \ - CR_8(NV_PGRAPH_COMBINEALPHAI0) \ - CR_8(NV_PGRAPH_COMBINEALPHAO0) \ - CR_8(NV_PGRAPH_COMBINEFACTOR0) \ - CR_8(NV_PGRAPH_COMBINEFACTOR1) \ - CR_1(NV_PGRAPH_SHADERCLIPMODE) \ - CR_4(NV_PGRAPH_TEXCTL0_0) \ - CR_4(NV_PGRAPH_TEXFMT0) \ - CR_4(NV_PGRAPH_TEXFILTER0) \ - CR_8(NV_PGRAPH_WINDOWCLIPX0) \ - CR_8(NV_PGRAPH_WINDOWCLIPY0) \ - CF(pg->primitive_mode, primitive_mode) \ - CF(pg->surface_scale_factor, surface_scale_factor) \ - CF(pg->compressed_attrs, compressed_attrs) \ - CFA(pg->texture_matrix_enable, texture_matrix_enable) - - #define CR_x(reg, x) CR_x__define(reg, x) - #define CF_x(type, src, name, x) CF_x__define(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - - #define CR_x(reg, x) CR_x__check(reg, x) - #define CF_x(type, src, name, x) CF_x__check(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - return false; - -dirty: - #define CR_x(reg, x) CR_x__update(reg, x) - #define CF_x(type, src, name, x) CF_x__update(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - return true; -} - -static void pgraph_bind_shaders(PGRAPHState *pg) -{ - int i, j; - - bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 2; - - bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 0; - - int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); - - NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, - vertex_program ? "yes" : "no", - fixed_function ? "yes" : "no"); - - bool binding_changed = false; - if (!pgraph_bind_shaders_test_dirty(pg) && !pg->program_data_dirty) { - nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); - goto update_constants; - } - - pg->program_data_dirty = false; - - ShaderBinding* old_binding = pg->shader_binding; - - ShaderState state; - memset(&state, 0, sizeof(ShaderState)); - - state.surface_scale_factor = pg->surface_scale_factor; - - state.compressed_attrs = pg->compressed_attrs; - - /* register combiner stuff */ - state.psh.window_clip_exclusive = pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; - state.psh.combiner_control = pg->regs[NV_PGRAPH_COMBINECTL]; - state.psh.shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG]; - state.psh.other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL]; - state.psh.final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0]; - state.psh.final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1]; - - state.psh.alpha_test = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE; - state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAFUNC); - - state.psh.point_sprite = pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE; - - state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK( - pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC); - - state.fixed_function = fixed_function; - - /* fixed function stuff */ - if (fixed_function) { - state.skinning = (enum VshSkinning)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_SKIN); - state.lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_LIGHTING); - state.normalization = pg->regs[NV_PGRAPH_CSV0_C] - & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE; - - /* color material */ - state.emission_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION); - state.ambient_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT); - state.diffuse_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE); - state.specular_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR); - } - - /* vertex program stuff */ - state.vertex_program = vertex_program, - state.z_perspective = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; - - state.point_params_enable = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); - state.point_size = - GET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V) / 8.0f; - if (state.point_params_enable) { - for (int i = 0; i < 8; i++) { - state.point_params[i] = pg->point_params[i]; - } - } - - /* geometry shader stuff */ - state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode; - state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); - state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_BACKFACEMODE); - - state.smooth_shading = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; - state.psh.smooth_shading = state.smooth_shading; - - state.program_length = 0; - - if (vertex_program) { - // copy in vertex program tokens - for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) { - uint32_t *cur_token = (uint32_t*)&pg->program_data[i]; - memcpy(&state.program_data[state.program_length], - cur_token, - VSH_TOKEN_SIZE * sizeof(uint32_t)); - state.program_length++; - - if (vsh_get_field(cur_token, FLD_FINAL)) { - break; - } - } - } - - /* Texgen */ - for (i = 0; i < 4; i++) { - unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; - for (j = 0; j < 4; j++) { - unsigned int masks[] = { - (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q - }; - state.texgen[i][j] = (enum VshTexgen)GET_MASK(pg->regs[reg], masks[j]); - } - } - - /* Fog */ - state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3] - & NV_PGRAPH_CONTROL_3_FOGENABLE; - if (state.fog_enable) { - /*FIXME: Use CSV0_D? */ - state.fog_mode = (enum VshFogMode)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_FOG_MODE); - state.foggen = (enum VshFoggen)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_FOGGENMODE); - } else { - /* FIXME: Do we still pass the fogmode? */ - state.fog_mode = (enum VshFogMode)0; - state.foggen = (enum VshFoggen)0; - } - - /* Texture matrices */ - for (i = 0; i < 4; i++) { - state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; - } - - /* Lighting */ - if (state.lighting) { - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - state.light[i] = (enum VshLight)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); - } - } - - /* Copy content of enabled combiner stages */ - int num_stages = pg->regs[NV_PGRAPH_COMBINECTL] & 0xFF; - for (i = 0; i < num_stages; i++) { - state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; - state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; - state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; - state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4]; - //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; - } - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - state.psh.compare_mode[i][j] = - (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1; - } - - uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4]; - bool enabled = pgraph_is_texture_stage_active(pg, i) && - (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE); - if (!enabled) { - continue; - } - - state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; - - uint32_t tex_fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; - unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); - ColorFormatInfo f = kelvin_color_format_map[color_format]; - state.psh.rect_tex[i] = f.linear; - - uint32_t border_source = GET_MASK(tex_fmt, - NV_PGRAPH_TEXFMT0_BORDER_SOURCE); - bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); - state.psh.border_logical_size[i][0] = 0.0f; - state.psh.border_logical_size[i][1] = 0.0f; - state.psh.border_logical_size[i][2] = 0.0f; - if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { - if (!f.linear && !cubemap) { - // The actual texture will be (at least) double the reported - // size and shifted by a 4 texel border but texture coordinates - // will still be relative to the reported size. - unsigned int reported_width = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); - unsigned int reported_height = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); - unsigned int reported_depth = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); - - state.psh.border_logical_size[i][0] = reported_width; - state.psh.border_logical_size[i][1] = reported_height; - state.psh.border_logical_size[i][2] = reported_depth; - - if (reported_width < 8) { - state.psh.border_inv_real_size[i][0] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][0] = - 1.0f / (reported_width * 2.0f); - } - if (reported_height < 8) { - state.psh.border_inv_real_size[i][1] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][1] = - 1.0f / (reported_height * 2.0f); - } - if (reported_depth < 8) { - state.psh.border_inv_real_size[i][2] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][2] = - 1.0f / (reported_depth * 2.0f); - } - } else { - NV2A_UNIMPLEMENTED("Border source texture with linear %d cubemap %d", - f.linear, cubemap); - } - } - - /* Keep track of whether texture data has been loaded as signed - * normalized integers or not. This dictates whether or not we will need - * to re-map in fragment shader for certain texture modes (e.g. - * bumpenvmap). - * - * FIXME: When signed texture data is loaded as unsigned and remapped in - * fragment shader, there may be interpolation artifacts. Fix this to - * support signed textures more appropriately. - */ - state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM) - || (f.gl_internal_format == GL_RG8_SNORM); - - state.psh.shadow_map[i] = f.depth; - - uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; - unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); - enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED; - /* FIXME: We do not distinguish between min and mag when - * performing convolution. Just use it if specified for min (common AA - * case). - */ - if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) { - int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL); - assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX || - k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3); - kernel = (enum ConvolutionFilter)k; - } - - state.psh.conv_tex[i] = kernel; - } - - uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); - qemu_mutex_lock(&pg->shader_cache_lock); - LruNode *node = lru_lookup(&pg->shader_cache, shader_state_hash, &state); - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - if (snode->binding || shader_load_from_memory(snode)) { - pg->shader_binding = snode->binding; - } else { - pg->shader_binding = generate_shaders(&state); - nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); - - /* cache it */ - snode->binding = pg->shader_binding; - if (g_config.perf.cache_shaders) { - shader_cache_to_disk(snode); - } - } - - qemu_mutex_unlock(&pg->shader_cache_lock); - - binding_changed = (pg->shader_binding != old_binding); - if (binding_changed) { - nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); - glUseProgram(pg->shader_binding->gl_program); - } - -update_constants: - pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed, - vertex_program, fixed_function); - - NV2A_GL_DGROUP_END(); -} - -static bool pgraph_framebuffer_dirty(PGRAPHState *pg) -{ - bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, - sizeof(SurfaceShape)) != 0; - if (!shape_changed || (!pg->surface_shape.color_format - && !pg->surface_shape.zeta_format)) { - return false; - } - return true; -} - -static bool pgraph_color_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); -} - -static bool pgraph_zeta_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ZWRITEENABLE - | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); -} - -static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) -{ - NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", - color, zeta, - pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); - /* FIXME: Does this apply to CLEARs too? */ - color = color && pgraph_color_write_enabled(pg); - zeta = zeta && pgraph_zeta_write_enabled(pg); - pg->surface_color.draw_dirty |= color; - pg->surface_zeta.draw_dirty |= zeta; - - if (pg->color_binding) { - pg->color_binding->draw_dirty |= color; - pg->color_binding->frame_time = pg->frame_time; - pg->color_binding->cleared = false; - - } - - if (pg->zeta_binding) { - pg->zeta_binding->draw_dirty |= zeta; - pg->zeta_binding->frame_time = pg->frame_time; - pg->zeta_binding->cleared = false; - - } -} - -static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src) -{ - GLint status; - char err_buf[512]; - - // Compile vertex shader - GLuint vs = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vs, 1, &vs_src, NULL); - glCompileShader(vs); - glGetShaderiv(vs, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) { - glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf); - err_buf[sizeof(err_buf)-1] = '\0'; - fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf); - exit(1); - } - - // Compile fragment shader - GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(fs, 1, &fs_src, NULL); - glCompileShader(fs); - glGetShaderiv(fs, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) { - glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf); - err_buf[sizeof(err_buf)-1] = '\0'; - fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf); - exit(1); - } - - // Link vertex and fragment shaders - GLuint prog = glCreateProgram(); - glAttachShader(prog, vs); - glAttachShader(prog, fs); - glLinkProgram(prog); - glUseProgram(prog); - - // Flag shaders for deletion (will still be retained for lifetime of prog) - glDeleteShader(vs); - glDeleteShader(fs); - - return prog; -} - -static void pgraph_init_render_to_texture(NV2AState *d) -{ - struct PGRAPHState *pg = &d->pgraph; - const char *vs = - "#version 330\n" - "void main()\n" - "{\n" - " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" - " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" - " gl_Position = vec4(x, y, 0, 1);\n" - "}\n"; - const char *fs = - "#version 330\n" - "uniform sampler2D tex;\n" - "uniform vec2 surface_size;\n" - "layout(location = 0) out vec4 out_Color;\n" - "void main()\n" - "{\n" - " vec2 texCoord;\n" - " texCoord.x = gl_FragCoord.x;\n" - " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" - " + (textureSize(tex,0).y - surface_size.y);\n" - " texCoord /= textureSize(tex,0).xy;\n" - " out_Color.rgba = texture(tex, texCoord);\n" - "}\n"; - - pg->s2t_rndr.prog = pgraph_compile_shader(vs, fs); - pg->s2t_rndr.tex_loc = glGetUniformLocation(pg->s2t_rndr.prog, "tex"); - pg->s2t_rndr.surface_size_loc = glGetUniformLocation(pg->s2t_rndr.prog, - "surface_size"); - - glGenVertexArrays(1, &pg->s2t_rndr.vao); - glBindVertexArray(pg->s2t_rndr.vao); - glGenBuffers(1, &pg->s2t_rndr.vbo); - glBindBuffer(GL_ARRAY_BUFFER, pg->s2t_rndr.vbo); - glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); - glGenFramebuffers(1, &pg->s2t_rndr.fbo); -} - -static bool pgraph_surface_to_texture_can_fastpath(SurfaceBinding *surface, - TextureShape *shape) -{ - // FIXME: Better checks/handling on formats and surface-texture compat - - int surface_fmt = surface->shape.color_format; - int texture_fmt = shape->color_format; - - if (!surface->color) { - // FIXME: Support zeta to color - return false; - } - - switch (surface_fmt) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; - default: break; - } - break; - default: break; - } - - trace_nv2a_pgraph_surface_texture_compat_failed( - surface_fmt, texture_fmt); - return false; -} - - -static void pgraph_render_surface_to(NV2AState *d, SurfaceBinding *surface, - int texture_unit, GLuint gl_target, - GLuint gl_texture, unsigned int width, - unsigned int height) -{ - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.s2t_rndr.fbo); - - GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 }; - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, - gl_texture, 0); - glDrawBuffers(1, draw_buffers); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - assert(glGetError() == GL_NO_ERROR); - - float color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); - glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color); - - glBindVertexArray(d->pgraph.s2t_rndr.vao); - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.s2t_rndr.vbo); - glUseProgram(d->pgraph.s2t_rndr.prog); - glProgramUniform1i(d->pgraph.s2t_rndr.prog, d->pgraph.s2t_rndr.tex_loc, - texture_unit); - glProgramUniform2f(d->pgraph.s2t_rndr.prog, - d->pgraph.s2t_rndr.surface_size_loc, width, height); - - glViewport(0, 0, width, height); - glColorMask(true, true, true, true); - glDisable(GL_DITHER); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glDisable(GL_STENCIL_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glClearColor(0.0f, 0.0f, 1.0f, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0, - 0); - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.gl_framebuffer); - glBindVertexArray(d->pgraph.gl_vertex_array); - glBindTexture(gl_target, gl_texture); - glUseProgram( - d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0); -} - -static void pgraph_render_surface_to_texture_slow( - NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, - TextureShape *texture_shape, int texture_unit) -{ - PGRAPHState *pg = &d->pgraph; - - const ColorFormatInfo *f = &kelvin_color_format_map[texture_shape->color_format]; - assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map)); - nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK); - - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindTexture(texture->gl_target, texture->gl_texture); - - unsigned int width = surface->width, - height = surface->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - size_t bufsize = width * height * surface->fmt.bytes_per_pixel; - - uint8_t *buf = g_malloc(bufsize); - pgraph_download_surface_data_to_buffer(d, surface, false, true, false, buf); - - width = texture_shape->width; - height = texture_shape->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, - f->gl_format, f->gl_type, buf); - g_free(buf); - glBindTexture(texture->gl_target, texture->gl_texture); -} - -/* Note: This function is intended to be called before PGRAPH configures GL - * state for rendering; it will configure GL state here but only restore a - * couple of items. - */ -static void pgraph_render_surface_to_texture(NV2AState *d, - SurfaceBinding *surface, - TextureBinding *texture, - TextureShape *texture_shape, - int texture_unit) -{ - PGRAPHState *pg = &d->pgraph; - - const ColorFormatInfo *f = - &kelvin_color_format_map[texture_shape->color_format]; - assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map)); - - nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); - - if (!pgraph_surface_to_texture_can_fastpath(surface, texture_shape)) { - pgraph_render_surface_to_texture_slow(d, surface, texture, - texture_shape, texture_unit); - return; - } - - - unsigned int width = texture_shape->width, - height = texture_shape->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindTexture(texture->gl_target, texture->gl_texture); - glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, - f->gl_format, f->gl_type, NULL); - glBindTexture(texture->gl_target, 0); - pgraph_render_surface_to(d, surface, texture_unit, texture->gl_target, - texture->gl_texture, width, height); - glBindTexture(texture->gl_target, texture->gl_texture); - glUseProgram( - d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0); -} - -static void pgraph_gl_fence(void) -{ - GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, - (GLuint64)(5000000000)); - assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED); - glDeleteSync(fence); -} - -static void pgraph_init_display_renderer(NV2AState *d) -{ - struct PGRAPHState *pg = &d->pgraph; - - glGenTextures(1, &pg->gl_display_buffer); - pg->gl_display_buffer_internal_format = 0; - pg->gl_display_buffer_width = 0; - pg->gl_display_buffer_height = 0; - pg->gl_display_buffer_format = 0; - pg->gl_display_buffer_type = 0; - - const char *vs = - "#version 330\n" - "void main()\n" - "{\n" - " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" - " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" - " gl_Position = vec4(x, y, 0, 1);\n" - "}\n"; - /* FIXME: improve interlace handling, pvideo */ - - const char *fs = - "#version 330\n" - "uniform sampler2D tex;\n" - "uniform bool pvideo_enable;\n" - "uniform sampler2D pvideo_tex;\n" - "uniform vec2 pvideo_in_pos;\n" - "uniform vec4 pvideo_pos;\n" - "uniform vec3 pvideo_scale;\n" - "uniform bool pvideo_color_key_enable;\n" - "uniform vec4 pvideo_color_key;\n" - "uniform vec2 display_size;\n" - "uniform float line_offset;\n" - "layout(location = 0) out vec4 out_Color;\n" - "void main()\n" - "{\n" - " vec2 texCoord = gl_FragCoord.xy/display_size;\n" - " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" - " out_Color.rgba = texture(tex, texCoord);\n" - " if (pvideo_enable) {\n" - " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" - " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" - " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" - " greaterThan(screenCoord, output_region.zw));\n" - " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" - " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" - " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" - " in_st.y *= -1.0;\n" - " out_Color.rgba = texture(pvideo_tex, in_st);\n" - " }\n" - " }\n" - "}\n"; - - pg->disp_rndr.prog = pgraph_compile_shader(vs, fs); - pg->disp_rndr.tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "tex"); - pg->disp_rndr.pvideo_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_enable"); - pg->disp_rndr.pvideo_tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_tex"); - pg->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_in_pos"); - pg->disp_rndr.pvideo_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_pos"); - pg->disp_rndr.pvideo_scale_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_scale"); - pg->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key_enable"); - pg->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key"); - pg->disp_rndr.display_size_loc = glGetUniformLocation(pg->disp_rndr.prog, "display_size"); - pg->disp_rndr.line_offset_loc = glGetUniformLocation(pg->disp_rndr.prog, "line_offset"); - - glGenVertexArrays(1, &pg->disp_rndr.vao); - glBindVertexArray(pg->disp_rndr.vao); - glGenBuffers(1, &pg->disp_rndr.vbo); - glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); - glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); - glGenFramebuffers(1, &pg->disp_rndr.fbo); - glGenTextures(1, &pg->disp_rndr.pvideo_tex); - assert(glGetError() == GL_NO_ERROR); -} - -static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int pitch) -{ - uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4); - int x, y; - for (y = 0; y < height; y++) { - const uint8_t *line = &data[y * pitch]; - const uint32_t row_offset = y * width; - for (x = 0; x < width; x++) { - uint8_t *pixel = &converted_data[(row_offset + x) * 4]; - convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - pixel[3] = 255; - } - } - return converted_data; -} - -static inline float pvideo_calculate_scale(unsigned int din_dout, - unsigned int output_size) -{ - float calculated_in = din_dout * (output_size - 1); - calculated_in = floorf(calculated_in / (1 << 20) + 0.5f); - return (calculated_in + 1.0f) / output_size; -} - -static void pgraph_render_display_pvideo_overlay(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. - // Many games seem to pass this value when initializing or tearing down - // PVIDEO. On its own, this generally does not result in the overlay being - // hidden, however there are certain games (e.g., Ultimate Beach Soccer) - // that use an unknown mechanism to hide the overlay without explicitly - // stopping it. - // Since the value seems to be set to 0xFFFFFFFF only in cases where the - // content is not valid, it is probably good enough to treat it as an - // implicit stop. - bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) - && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; - glUniform1ui(d->pgraph.disp_rndr.pvideo_enable_loc, enabled); - if (!enabled) { - return; - } - - hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; - hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; - hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; - - int in_width = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); - int in_height = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); - - int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_S); - int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_T); - - int in_pitch = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); - int in_color = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); - - unsigned int out_width = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); - unsigned int out_height = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); - - float scale_x = 1.0f; - float scale_y = 1.0f; - unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; - unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; - if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_x = pvideo_calculate_scale(ds_dx, out_width); - } - if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_y = pvideo_calculate_scale(dt_dy, out_height); - } - - // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results - // in them being capped to the output size, content is not scaled. This is - // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF - // during initialization or teardown. - if (in_width > out_width) { - in_width = floorf((float)out_width * scale_x + 0.5f); - } - if (in_height > out_height) { - in_height = floorf((float)out_height * scale_y + 0.5f); - } - - /* TODO: support other color formats */ - assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); - - unsigned int out_x = - GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); - unsigned int out_y = - GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); - - unsigned int color_key_enabled = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); - glUniform1ui(d->pgraph.disp_rndr.pvideo_color_key_enable_loc, - color_key_enabled); - - // TODO: Verify that masking off the top byte is correct. - // SeaBlade sets a color key of 0x80000000 but the texture passed into the - // shader is cleared to 0 alpha. - unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; - glUniform4f(d->pgraph.disp_rndr.pvideo_color_key_loc, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); - - assert(offset + in_pitch * in_height <= limit); - hwaddr end = base + offset + in_pitch * in_height; - assert(end <= memory_region_size(d->vram)); - - pgraph_apply_scaling_factor(pg, &out_x, &out_y); - pgraph_apply_scaling_factor(pg, &out_width, &out_height); - - // Translate for the GL viewport origin. - out_y = MAX(pg->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); - - glActiveTexture(GL_TEXTURE0 + 1); - glBindTexture(GL_TEXTURE_2D, g_nv2a->pgraph.disp_rndr.pvideo_tex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( - d->vram_ptr + base + offset, in_width, in_height, in_pitch); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, - GL_UNSIGNED_BYTE, tex_rgba); - g_free(tex_rgba); - glUniform1i(d->pgraph.disp_rndr.pvideo_tex_loc, 1); - glUniform2f(d->pgraph.disp_rndr.pvideo_in_pos_loc, in_s, in_t); - glUniform4f(d->pgraph.disp_rndr.pvideo_pos_loc, - out_x, out_y, out_width, out_height); - glUniform3f(d->pgraph.disp_rndr.pvideo_scale_loc, - scale_x, scale_y, 1.0f / pg->surface_scale_factor); -} - -static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) -{ - struct PGRAPHState *pg = &d->pgraph; - - unsigned int width, height; - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; - - /* Adjust viewport height for interlaced mode, used only in 1080i */ - if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { - height *= 2; - } - - pgraph_apply_scaling_factor(pg, &width, &height); - - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.disp_rndr.fbo); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, pg->gl_display_buffer); - bool recreate = ( - surface->fmt.gl_internal_format != pg->gl_display_buffer_internal_format - || width != pg->gl_display_buffer_width - || height != pg->gl_display_buffer_height - || surface->fmt.gl_format != pg->gl_display_buffer_format - || surface->fmt.gl_type != pg->gl_display_buffer_type - ); - - if (recreate) { - /* XXX: There's apparently a bug in some Intel OpenGL drivers for - * Windows that will leak this texture when its orphaned after use in - * another context, apparently regardless of which thread it's created - * or released on. - * - * Driver: 27.20.100.8729 9/11/2020 W10 x64 - * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423 - */ - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - pg->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; - pg->gl_display_buffer_width = width; - pg->gl_display_buffer_height = height; - pg->gl_display_buffer_format = surface->fmt.gl_format; - pg->gl_display_buffer_type = surface->fmt.gl_type; - glTexImage2D(GL_TEXTURE_2D, 0, - pg->gl_display_buffer_internal_format, - pg->gl_display_buffer_width, - pg->gl_display_buffer_height, - 0, - pg->gl_display_buffer_format, - pg->gl_display_buffer_type, - NULL); - } - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, pg->gl_display_buffer, 0); - GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; - glDrawBuffers(1, DrawBuffers); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glBindVertexArray(pg->disp_rndr.vao); - glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); - glUseProgram(pg->disp_rndr.prog); - glProgramUniform1i(pg->disp_rndr.prog, pg->disp_rndr.tex_loc, 0); - glUniform2f(d->pgraph.disp_rndr.display_size_loc, width, height); - glUniform1f(d->pgraph.disp_rndr.line_offset_loc, line_offset); - pgraph_render_display_pvideo_overlay(d); - - glViewport(0, 0, width, height); - glColorMask(true, true, true, true); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glDisable(GL_STENCIL_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, 0, 0); -} - -void pgraph_gl_sync(NV2AState *d) -{ - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL) { - qemu_event_set(&d->pgraph.gl_sync_complete); - return; - } - - /* FIXME: Sanity check surface dimensions */ - - /* Wait for queued commands to complete */ - pgraph_upload_surface_data(d, surface, !tcg_enabled()); - pgraph_gl_fence(); - assert(glGetError() == GL_NO_ERROR); - - /* Render framebuffer in display context */ - glo_set_current(g_nv2a_context_display); - pgraph_render_display(d, surface); - pgraph_gl_fence(); - assert(glGetError() == GL_NO_ERROR); - - /* Switch back to original context */ - glo_set_current(g_nv2a_context_render); - - qatomic_set(&d->pgraph.gl_sync_pending, false); - qemu_event_set(&d->pgraph.gl_sync_complete); -} - -const uint8_t *nv2a_get_dac_palette(void) -{ - return g_nv2a->puserdac.palette; -} - -int nv2a_get_screen_off(void) -{ - return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF; -} - -int nv2a_get_framebuffer_surface(void) -{ - NV2AState *d = g_nv2a; - PGRAPHState *pg = &d->pgraph; - - qemu_mutex_lock(&d->pfifo.lock); - // FIXME: Possible race condition with pgraph, consider lock - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL || !surface->color) { - qemu_mutex_unlock(&d->pfifo.lock); - return 0; - } - - assert(surface->color); - assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); - assert(surface->fmt.gl_format == GL_RGBA - || surface->fmt.gl_format == GL_RGB - || surface->fmt.gl_format == GL_BGR - || surface->fmt.gl_format == GL_BGRA - ); - - surface->frame_time = pg->frame_time; - qemu_event_reset(&d->pgraph.gl_sync_complete); - qatomic_set(&pg->gl_sync_pending, true); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.gl_sync_complete); - - return pg->gl_display_buffer; -} - -static bool pgraph_check_surface_to_texture_compatibility( - const SurfaceBinding *surface, - const TextureShape *shape) -{ - // FIXME: Better checks/handling on formats and surface-texture compat - - if ((!surface->swizzle && surface->pitch != shape->pitch) || - surface->width != shape->width || - surface->height != shape->height) { - return false; - } - - int surface_fmt = surface->shape.color_format; - int texture_fmt = shape->color_format; - - if (!surface->color) { - // FIXME: Support zeta to color - return false; - } - - if (shape->cubemap) { - // FIXME: Support rendering surface to cubemap face - return false; - } - - if (shape->levels > 1) { - // FIXME: Support rendering surface to mip levels - return false; - } - - switch (surface_fmt) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; - default: break; - } - break; - default: - break; - } - - trace_nv2a_pgraph_surface_texture_compat_failed( - surface_fmt, texture_fmt); - return false; -} - -static void pgraph_wait_for_surface_download(SurfaceBinding *e) -{ - NV2AState *d = g_nv2a; - - if (qatomic_read(&e->draw_dirty)) { - qemu_mutex_lock(&d->pfifo.lock); - qemu_event_reset(&d->pgraph.downloads_complete); - qatomic_set(&e->download_pending, true); - qatomic_set(&d->pgraph.downloads_pending, true); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.downloads_complete); - } -} - -static void pgraph_surface_access_callback( - void *opaque, - MemoryRegion *mr, - hwaddr addr, - hwaddr len, - bool write) -{ - SurfaceBinding *e = opaque; - assert(addr >= e->vram_addr); - hwaddr offset = addr - e->vram_addr; - assert(offset < e->size); - - if (qatomic_read(&e->draw_dirty)) { - trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); - pgraph_wait_for_surface_download(e); - } - - if (write && !qatomic_read(&e->upload_pending)) { - trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); - qatomic_set(&e->upload_pending, true); - } -} - -static SurfaceBinding *pgraph_surface_put(NV2AState *d, - hwaddr addr, - SurfaceBinding *surface_in) -{ - assert(pgraph_surface_get(d, addr) == NULL); - - SurfaceBinding *surface, *next; - uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1; - QTAILQ_FOREACH_SAFE(surface, &d->pgraph.surfaces, entry, next) { - uintptr_t s_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr > e_end - || surface_in->vram_addr > s_end); - if (overlapping) { - trace_nv2a_pgraph_surface_evict_overlapping( - surface->vram_addr, surface->width, surface->height, - surface->pitch); - pgraph_download_surface_data_if_dirty(d, surface); - pgraph_surface_invalidate(d, surface); - } - } - - SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding)); - assert(surface_out != NULL); - *surface_out = *surface_in; - - if (tcg_enabled()) { - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - mem_access_callback_insert(qemu_get_cpu(0), - d->vram, surface_out->vram_addr, surface_out->size, - &surface_out->access_cb, &pgraph_surface_access_callback, - surface_out); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } - - QTAILQ_INSERT_TAIL(&d->pgraph.surfaces, surface_out, entry); - - return surface_out; -} - -static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { - if (surface->vram_addr == addr) { - return surface; - } - } - - return NULL; -} - -static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { - if (addr >= surface->vram_addr && - addr < (surface->vram_addr + surface->size)) { - return surface; - } - } - - return NULL; -} - -static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *surface) -{ - trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); - - if (surface == d->pgraph.color_binding) { - assert(d->pgraph.surface_color.buffer_dirty); - pgraph_unbind_surface(d, true); - } - if (surface == d->pgraph.zeta_binding) { - assert(d->pgraph.surface_zeta.buffer_dirty); - pgraph_unbind_surface(d, false); - } - - if (tcg_enabled()) { - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } - - glDeleteTextures(1, &surface->gl_buffer); - - QTAILQ_REMOVE(&d->pgraph.surfaces, surface, entry); - g_free(surface); -} - -static void pgraph_surface_evict_old(NV2AState *d) -{ - const int surface_age_limit = 5; - - SurfaceBinding *s, *next; - QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) { - int last_used = d->pgraph.frame_time - s->frame_time; - if (last_used >= surface_age_limit) { - trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); - pgraph_download_surface_data_if_dirty(d, s); - pgraph_surface_invalidate(d, s); - } - } -} - -static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, - SurfaceBinding *s2, bool strict) -{ - bool format_compatible = - (s1->color == s2->color) && - (s1->fmt.gl_attachment == s2->fmt.gl_attachment) && - (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) && - (s1->pitch == s2->pitch) && - (s1->shape.clip_x <= s2->shape.clip_x) && - (s1->shape.clip_y <= s2->shape.clip_y); - if (!format_compatible) { - return false; - } - - if (!strict) { - return (s1->width >= s2->width) && (s1->height >= s2->height); - } else { - return (s1->width == s2->width) && (s1->height == s2->height); - } -} - -static void pgraph_download_surface_data_if_dirty(NV2AState *d, - SurfaceBinding *surface) -{ - if (surface->draw_dirty) { - pgraph_download_surface_data(d, surface, true); - } -} - -static void pgraph_bind_current_surface(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - if (pg->color_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, pg->color_binding->fmt.gl_attachment, - GL_TEXTURE_2D, pg->color_binding->gl_buffer, 0); - } - - if (pg->zeta_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, pg->zeta_binding->fmt.gl_attachment, - GL_TEXTURE_2D, pg->zeta_binding->gl_buffer, 0); - } - - if (pg->color_binding || pg->zeta_binding) { - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == - GL_FRAMEBUFFER_COMPLETE); - } -} - -static void surface_copy_shrink_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - *(uint32_t *)out = *(uint32_t *)in; - out += 4; - in += 4 * factor; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - *(uint16_t *)out = *(uint16_t *)in; - out += 2; - in += 2 * factor; - } - } else { - for (unsigned int x = 0; x < width; x++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - in += bytes_per_pixel * factor; - } - } -} - - -static void pgraph_download_surface_data_to_buffer(NV2AState *d, - SurfaceBinding *surface, - bool swizzle, bool flip, - bool downscale, - uint8_t *pixels) -{ - PGRAPHState *pg = &d->pgraph; - swizzle &= surface->swizzle; - downscale &= (pg->surface_scale_factor != 1); - - trace_nv2a_pgraph_surface_download( - surface->color ? "COLOR" : "ZETA", - surface->swizzle ? "sz" : "lin", surface->vram_addr, - surface->width, surface->height, surface->pitch, - surface->fmt.bytes_per_pixel); - - /* Bind destination surface to framebuffer */ - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, - GL_TEXTURE_2D, surface->gl_buffer, 0); - - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - - /* Read surface into memory */ - uint8_t *gl_read_buf = pixels; - - uint8_t *swizzle_buf = pixels; - if (swizzle) { - /* FIXME: Allocate big buffer up front and re-alloc if necessary. - * FIXME: Consider swizzle in shader - */ - assert(pg->surface_scale_factor == 1 || downscale); - swizzle_buf = (uint8_t *)g_malloc(surface->size); - gl_read_buf = swizzle_buf; - } - - if (downscale) { - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor * - surface->size); - gl_read_buf = pg->scale_buf; - } - - glo_readpixels( - surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel, - pg->surface_scale_factor * surface->pitch, - pg->surface_scale_factor * surface->width, - pg->surface_scale_factor * surface->height, flip, gl_read_buf); - - /* FIXME: Replace this with a hw accelerated version */ - if (downscale) { - assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel)); - uint8_t *out = swizzle_buf, *in = pg->scale_buf; - for (unsigned int y = 0; y < surface->height; y++) { - surface_copy_shrink_row(out, in, surface->width, - surface->fmt.bytes_per_pixel, - pg->surface_scale_factor); - in += surface->pitch * pg->surface_scale_factor * - pg->surface_scale_factor; - out += surface->pitch; - } - } - - if (swizzle) { - swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, - surface->pitch, surface->fmt.bytes_per_pixel); - g_free(swizzle_buf); - } - - /* Re-bind original framebuffer target */ - glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, - GL_TEXTURE_2D, 0, 0); - pgraph_bind_current_surface(d); -} - -static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, - bool force) -{ - if (!(surface->download_pending || force)) { - return; - } - - /* FIXME: Respect write enable at last TOU? */ - - nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); - - pgraph_download_surface_data_to_buffer( - d, surface, true, true, true, d->vram_ptr + surface->vram_addr); - - memory_region_set_client_dirty(d->vram, surface->vram_addr, - surface->pitch * surface->height, - DIRTY_MEMORY_VGA); - memory_region_set_client_dirty(d->vram, surface->vram_addr, - surface->pitch * surface->height, - DIRTY_MEMORY_NV2A_TEX); - - surface->download_pending = false; - surface->draw_dirty = false; -} - -void pgraph_process_pending_downloads(NV2AState *d) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - pgraph_download_surface_data(d, surface, false); - } - - qatomic_set(&d->pgraph.downloads_pending, false); - qemu_event_set(&d->pgraph.downloads_complete); -} - -void pgraph_download_dirty_surfaces(NV2AState *d) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - pgraph_download_surface_data_if_dirty(d, surface); - } - - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, false); - qemu_event_set(&d->pgraph.dirty_surfaces_download_complete); -} - - -static void surface_copy_expand_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint32_t *)out = *(uint32_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint16_t *)out = *(uint16_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } -} - -static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, - unsigned int height, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - size_t out_pitch = width * bytes_per_pixel * factor; - - for (unsigned int y = 0; y < height; y++) { - surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); - uint8_t *row_in = out; - for (unsigned int i = 1; i < factor; i++) { - out += out_pitch; - memcpy(out, row_in, out_pitch); - } - in += width * bytes_per_pixel; - out += out_pitch; - } -} - -static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, - bool force) -{ - if (!(surface->upload_pending || force)) { - return; - } - - nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); - - trace_nv2a_pgraph_surface_upload( - surface->color ? "COLOR" : "ZETA", - surface->swizzle ? "sz" : "lin", surface->vram_addr, - surface->width, surface->height, surface->pitch, - surface->fmt.bytes_per_pixel); - - PGRAPHState *pg = &d->pgraph; - - surface->upload_pending = false; - surface->draw_time = pg->draw_time; - - // FIXME: Don't query GL for texture binding - GLint last_texture_binding; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); - - // FIXME: Replace with FBO to not disturb current state - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - - uint8_t *data = d->vram_ptr; - uint8_t *buf = data + surface->vram_addr; - - if (surface->swizzle) { - buf = (uint8_t*)g_malloc(surface->size); - unswizzle_rect(data + surface->vram_addr, - surface->width, surface->height, - buf, - surface->pitch, - surface->fmt.bytes_per_pixel); - } - - /* FIXME: Replace this flip/scaling */ - - // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); - } - - uint8_t *gl_read_buf = flipped_buf; - unsigned int width = surface->width, height = surface->height; - - if (pg->surface_scale_factor > 1) { - pgraph_apply_scaling_factor(pg, &width, &height); - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; - surface_copy_expand(out, in, surface->width, surface->height, - surface->fmt.bytes_per_pixel, - d->pgraph.surface_scale_factor); - } - - int prev_unpack_alignment; - glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment); - if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) { - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - } else { - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - } - - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, - height, 0, surface->fmt.gl_format, surface->fmt.gl_type, - gl_read_buf); - glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); - g_free(flipped_buf); - if (surface->swizzle) { - g_free(buf); - } - - // Rebind previous framebuffer binding - glBindTexture(GL_TEXTURE_2D, last_texture_binding); - - pgraph_bind_current_surface(d); -} - -static void pgraph_compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2) -{ - #define DO_CMP(fld) \ - if (s1->fld != s2->fld) \ - trace_nv2a_pgraph_surface_compare_mismatch( \ - #fld, (long int)s1->fld, (long int)s2->fld); - DO_CMP(shape.clip_x) - DO_CMP(shape.clip_width) - DO_CMP(shape.clip_y) - DO_CMP(shape.clip_height) - DO_CMP(gl_buffer) - DO_CMP(fmt.bytes_per_pixel) - DO_CMP(fmt.gl_attachment) - DO_CMP(fmt.gl_internal_format) - DO_CMP(fmt.gl_format) - DO_CMP(fmt.gl_type) - DO_CMP(color) - DO_CMP(swizzle) - DO_CMP(vram_addr) - DO_CMP(width) - DO_CMP(height) - DO_CMP(pitch) - DO_CMP(size) - DO_CMP(dma_addr) - DO_CMP(dma_len) - DO_CMP(frame_time) - DO_CMP(draw_time) - #undef DO_CMP -} - -static void pgraph_populate_surface_binding_entry_sized(NV2AState *d, - bool color, - unsigned int width, - unsigned int height, - SurfaceBinding *entry) -{ - PGRAPHState *pg = &d->pgraph; - Surface *surface; - hwaddr dma_address; - SurfaceFormatInfo fmt; - - if (color) { - surface = &pg->surface_color; - dma_address = pg->dma_color; - assert(pg->surface_shape.color_format != 0); - assert(pg->surface_shape.color_format < - ARRAY_SIZE(kelvin_surface_color_format_map)); - fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format]; - if (fmt.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", - pg->surface_shape.color_format); - abort(); - } - } else { - surface = &pg->surface_zeta; - dma_address = pg->dma_zeta; - assert(pg->surface_shape.zeta_format != 0); - assert(pg->surface_shape.zeta_format < - ARRAY_SIZE(kelvin_surface_zeta_float_format_map)); - const SurfaceFormatInfo *map = - pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_map : - kelvin_surface_zeta_fixed_format_map; - fmt = map[pg->surface_shape.zeta_format]; - } - - DMAObject dma = nv_dma_load(d, dma_address); - /* There's a bunch of bugs that could cause us to hit this function - * at the wrong time and get a invalid dma object. - * Check that it's sane. */ - assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); - // assert(dma.address + surface->offset != 0); - assert(surface->offset <= dma.limit); - assert(surface->offset + surface->pitch * height <= dma.limit + 1); - assert(surface->pitch % fmt.bytes_per_pixel == 0); - assert((dma.address & ~0x07FFFFFF) == 0); - - entry->shape = (color || !pg->color_binding) ? pg->surface_shape : - pg->color_binding->shape; - entry->gl_buffer = 0; - entry->fmt = fmt; - entry->color = color; - entry->swizzle = - (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - entry->vram_addr = dma.address + surface->offset; - entry->width = width; - entry->height = height; - entry->pitch = surface->pitch; - entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); - entry->upload_pending = true; - entry->download_pending = false; - entry->draw_dirty = false; - entry->dma_addr = dma.address; - entry->dma_len = dma.limit; - entry->frame_time = pg->frame_time; - entry->draw_time = pg->draw_time; - entry->cleared = false; -} - -static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color, - SurfaceBinding *entry) -{ - PGRAPHState *pg = &d->pgraph; - unsigned int width, height; - - if (color || !pg->color_binding) { - pgraph_get_surface_dimensions(pg, &width, &height); - pgraph_apply_anti_aliasing_factor(pg, &width, &height); - - /* Since we determine surface dimensions based on the clipping - * rectangle, make sure to include the surface offset as well. - */ - if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { - width += pg->surface_shape.clip_x; - height += pg->surface_shape.clip_y; - } - } else { - width = pg->color_binding->width; - height = pg->color_binding->height; - } - - pgraph_populate_surface_binding_entry_sized(d, color, width, height, entry); -} - -static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) -{ - PGRAPHState *pg = &d->pgraph; - - SurfaceBinding entry; - pgraph_populate_surface_binding_entry(d, color, &entry); - - Surface *surface = color ? &pg->surface_color : &pg->surface_zeta; - - bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( - d->vram, entry.vram_addr, entry.size, - DIRTY_MEMORY_NV2A); - - if (upload && (surface->buffer_dirty || mem_dirty)) { - pgraph_unbind_surface(d, color); - - SurfaceBinding *found = pgraph_surface_get(d, entry.vram_addr); - if (found != NULL) { - /* FIXME: Support same color/zeta surface target? In the mean time, - * if the surface we just found is currently bound, just unbind it. - */ - SurfaceBinding *other = (color ? pg->zeta_binding - : pg->color_binding); - if (found == other) { - NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); - pgraph_unbind_surface(d, !color); - } - } - - trace_nv2a_pgraph_surface_target( - color ? "COLOR" : "ZETA", entry.vram_addr, - entry.swizzle ? "sz" : "ln", - pg->surface_shape.anti_aliasing, - pg->surface_shape.clip_x, - pg->surface_shape.clip_width, pg->surface_shape.clip_y, - pg->surface_shape.clip_height); - - bool should_create = true; - - if (found != NULL) { - bool is_compatible = - pgraph_check_surface_compatibility(found, &entry, false); - -#define TRACE_ARGS found->vram_addr, found->width, found->height, \ - found->swizzle ? "sz" : "ln", \ - found->shape.anti_aliasing, found->shape.clip_x, \ - found->shape.clip_width, found->shape.clip_y, \ - found->shape.clip_height, found->pitch - if (found->color) { - trace_nv2a_pgraph_surface_match_color(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS); - } -#undef TRACE_ARGS - - assert(!(entry.swizzle && pg->clearing)); - - if (found->swizzle != entry.swizzle) { - /* Clears should only be done on linear surfaces. Avoid - * synchronization by allowing (1) a surface marked swizzled to - * be cleared under the assumption the entire surface is - * destined to be cleared and (2) a fully cleared linear surface - * to be marked swizzled. Strictly match size to avoid - * pathological cases. - */ - is_compatible &= (pg->clearing || found->cleared) && - pgraph_check_surface_compatibility(found, &entry, true); - if (is_compatible) { - trace_nv2a_pgraph_surface_migrate_type( - entry.swizzle ? "swizzled" : "linear"); - } - } - - if (is_compatible && color && - !pgraph_check_surface_compatibility(found, &entry, true)) { - SurfaceBinding zeta_entry; - pgraph_populate_surface_binding_entry_sized( - d, !color, found->width, found->height, &zeta_entry); - hwaddr color_end = found->vram_addr + found->size; - hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; - is_compatible &= found->vram_addr >= zeta_end || - zeta_entry.vram_addr >= color_end; - } - - if (is_compatible && !color && pg->color_binding) { - is_compatible &= (found->width == pg->color_binding->width) && - (found->height == pg->color_binding->height); - } - - if (is_compatible) { - /* FIXME: Refactor */ - pg->surface_binding_dim.width = found->width; - pg->surface_binding_dim.clip_x = found->shape.clip_x; - pg->surface_binding_dim.clip_width = found->shape.clip_width; - pg->surface_binding_dim.height = found->height; - pg->surface_binding_dim.clip_y = found->shape.clip_y; - pg->surface_binding_dim.clip_height = found->shape.clip_height; - found->upload_pending |= mem_dirty; - pg->surface_zeta.buffer_dirty |= color; - should_create = false; - } else { - trace_nv2a_pgraph_surface_evict_reason( - "incompatible", found->vram_addr); - pgraph_compare_surfaces(found, &entry); - pgraph_download_surface_data_if_dirty(d, found); - pgraph_surface_invalidate(d, found); - } - } - - if (should_create) { - glGenTextures(1, &entry.gl_buffer); - glBindTexture(GL_TEXTURE_2D, entry.gl_buffer); - NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer, - "%s format: %0X, width: %d, height: %d " - "(addr %" HWADDR_PRIx ")", - color ? "color" : "zeta", - color ? pg->surface_shape.color_format - : pg->surface_shape.zeta_format, - entry.width, entry.height, surface->offset); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - unsigned int width = entry.width, height = entry.height; - pgraph_apply_scaling_factor(pg, &width, &height); - glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width, - height, 0, entry.fmt.gl_format, entry.fmt.gl_type, - NULL); - found = pgraph_surface_put(d, entry.vram_addr, &entry); - - /* FIXME: Refactor */ - pg->surface_binding_dim.width = entry.width; - pg->surface_binding_dim.clip_x = entry.shape.clip_x; - pg->surface_binding_dim.clip_width = entry.shape.clip_width; - pg->surface_binding_dim.height = entry.height; - pg->surface_binding_dim.clip_y = entry.shape.clip_y; - pg->surface_binding_dim.clip_height = entry.shape.clip_height; - - if (color && pg->zeta_binding && (pg->zeta_binding->width != entry.width || pg->zeta_binding->height != entry.height)) { - pg->surface_zeta.buffer_dirty = true; - } - } - -#define TRACE_ARGS found->vram_addr, found->width, found->height, \ - found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \ - found->shape.clip_x, found->shape.clip_width, \ - found->shape.clip_y, found->shape.clip_height, found->pitch - - if (color) { - if (should_create) { - trace_nv2a_pgraph_surface_create_color(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS); - } - - pg->color_binding = found; - } else { - if (should_create) { - trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS); - } - pg->zeta_binding = found; - } -#undef TRACE_ARGS - - glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment, - GL_TEXTURE_2D, found->gl_buffer, 0); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == - GL_FRAMEBUFFER_COMPLETE); - - surface->buffer_dirty = false; - } - - if (!upload && surface->draw_dirty) { - if (!tcg_enabled()) { - /* FIXME: Cannot monitor for reads/writes; flush now */ - pgraph_download_surface_data(d, - color ? pg->color_binding : pg->zeta_binding, true); - } - - surface->write_enabled_cache = false; - surface->draw_dirty = false; - } -} - -static void pgraph_unbind_surface(NV2AState *d, bool color) -{ - PGRAPHState *pg = &d->pgraph; - - if (color) { - if (pg->color_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, 0, 0); - pg->color_binding = NULL; - } - } else { - if (pg->zeta_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - pg->zeta_binding = NULL; - } - } -} - -static void pgraph_update_surface(NV2AState *d, bool upload, - bool color_write, bool zeta_write) -{ - PGRAPHState *pg = &d->pgraph; - - pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_Z_FORMAT); - - color_write = color_write && - (pg->clearing || pgraph_color_write_enabled(pg)); - zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); - - if (upload) { - bool fb_dirty = pgraph_framebuffer_dirty(pg); - if (fb_dirty) { - memcpy(&pg->last_surface_shape, &pg->surface_shape, - sizeof(SurfaceShape)); - pg->surface_color.buffer_dirty = true; - pg->surface_zeta.buffer_dirty = true; - } - - if (pg->surface_color.buffer_dirty) { - pgraph_unbind_surface(d, true); - } - - if (color_write) { - pgraph_update_surface_part(d, true, true); - } - - if (pg->surface_zeta.buffer_dirty) { - pgraph_unbind_surface(d, false); - } - - if (zeta_write) { - pgraph_update_surface_part(d, true, false); - } - } else { - if ((color_write || pg->surface_color.write_enabled_cache) - && pg->surface_color.draw_dirty) { - pgraph_update_surface_part(d, false, true); - } - if ((zeta_write || pg->surface_zeta.write_enabled_cache) - && pg->surface_zeta.draw_dirty) { - pgraph_update_surface_part(d, false, false); - } - } - - if (upload) { - pg->draw_time++; - } - - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - - if (pg->color_binding) { - pg->color_binding->frame_time = pg->frame_time; - if (upload) { - pgraph_upload_surface_data(d, pg->color_binding, false); - pg->color_binding->draw_time = pg->draw_time; - pg->color_binding->swizzle = swizzle; - } - } - - if (pg->zeta_binding) { - pg->zeta_binding->frame_time = pg->frame_time; - if (upload) { - pgraph_upload_surface_data(d, pg->zeta_binding, false); - pg->zeta_binding->draw_time = pg->draw_time; - pg->zeta_binding->swizzle = swizzle; - } - } - - // Sanity check color and zeta dimensions match - if (pg->color_binding && pg->zeta_binding) { - assert((pg->color_binding->width == pg->zeta_binding->width) - && (pg->color_binding->height == pg->zeta_binding->height)); - } - - pgraph_surface_evict_old(d); -} - -struct pgraph_texture_possibly_dirty_struct { - hwaddr addr, end; -}; - -static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) -{ - struct pgraph_texture_possibly_dirty_struct *test = - (struct pgraph_texture_possibly_dirty_struct *)opaque; - - struct TextureLruNode *tnode = container_of(node, TextureLruNode, node); - if (tnode->binding == NULL || tnode->possibly_dirty) { - return; - } - - uintptr_t k_tex_addr = tnode->key.texture_vram_offset; - uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; - bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); - - if (tnode->key.palette_length > 0) { - uintptr_t k_pal_addr = tnode->key.palette_vram_offset; - uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; - overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); - } - - tnode->possibly_dirty |= overlapping; -} - - -static void pgraph_mark_textures_possibly_dirty(NV2AState *d, - hwaddr addr, hwaddr size) -{ - hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; - addr &= TARGET_PAGE_MASK; - assert(end <= memory_region_size(d->vram)); - - struct pgraph_texture_possibly_dirty_struct test = { - .addr = addr, - .end = end, - }; - - lru_visit_active(&d->pgraph.texture_cache, - pgraph_mark_textures_possibly_dirty_visitor, - &test); -} - -static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) -{ - hwaddr end = TARGET_PAGE_ALIGN(addr + size); - addr &= TARGET_PAGE_MASK; - assert(end < memory_region_size(d->vram)); - return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, - DIRTY_MEMORY_NV2A_TEX); -} - -static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage) -{ - assert(stage < NV2A_MAX_TEXTURES); - uint32_t mode = (pg->regs[NV_PGRAPH_SHADERPROG] >> (stage * 5)) & 0x1F; - return !!mode; -} - -// Check if any of the pages spanned by the a texture are dirty. -static bool pgraph_check_texture_possibly_dirty(NV2AState *d, hwaddr texture_vram_offset, unsigned int length, hwaddr palette_vram_offset, unsigned int palette_length) -{ - bool possibly_dirty = false; - if (pgraph_check_texture_dirty(d, texture_vram_offset, length)) { - possibly_dirty = true; - pgraph_mark_textures_possibly_dirty(d, texture_vram_offset, length); - } - if (palette_length && pgraph_check_texture_dirty(d, palette_vram_offset, - palette_length)) { - possibly_dirty = true; - pgraph_mark_textures_possibly_dirty(d, palette_vram_offset, - palette_length); - } - return possibly_dirty; -} - -static void apply_texture_parameters(TextureBinding *binding, - const ColorFormatInfo *f, - unsigned int dimensionality, - unsigned int filter, - unsigned int address, - bool is_bordered, - uint32_t border_color) -{ - unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); - unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); - unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); - unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); - unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); - - if (f->linear) { - /* somtimes games try to set mipmap min filters on linear textures. - * this could indicate a bug... */ - switch (min_filter) { - case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; - break; - case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; - break; - } - } - - if (min_filter != binding->min_filter) { - glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, - pgraph_texture_min_filter_map[min_filter]); - binding->min_filter = min_filter; - } - if (mag_filter != binding->mag_filter) { - glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, - pgraph_texture_mag_filter_map[mag_filter]); - binding->mag_filter = mag_filter; - } - - /* Texture wrapping */ - assert(addru < ARRAY_SIZE(pgraph_texture_addr_map)); - if (addru != binding->addru) { - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, - pgraph_texture_addr_map[addru]); - binding->addru = addru; - } - bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - if (dimensionality > 1) { - if (addrv != binding->addrv) { - assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, - pgraph_texture_addr_map[addrv]); - binding->addrv = addrv; - } - needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - } - if (dimensionality > 2) { - if (addrp != binding->addrp) { - assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, - pgraph_texture_addr_map[addrp]); - binding->addrp = addrp; - } - needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - } - - if (!is_bordered && needs_border_color) { - if (!binding->border_color_set || binding->border_color != border_color) { - GLfloat gl_border_color[] = { - /* FIXME: Color channels might be wrong order */ - ((border_color >> 16) & 0xFF) / 255.0f, /* red */ - ((border_color >> 8) & 0xFF) / 255.0f, /* green */ - (border_color & 0xFF) / 255.0f, /* blue */ - ((border_color >> 24) & 0xFF) / 255.0f /* alpha */ - }; - glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, - gl_border_color); - - binding->border_color_set = true; - binding->border_color = border_color; - } - } -} - -static void pgraph_bind_textures(NV2AState *d) -{ - int i; - PGRAPHState *pg = &d->pgraph; - - NV2A_GL_DGROUP_BEGIN("%s", __func__); - - for (i=0; iregs[NV_PGRAPH_TEXCTL0_0 + i*4]; - bool enabled = pgraph_is_texture_stage_active(pg, i) && - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); - /* FIXME: What happens if texture is disabled but stage is active? */ - - glActiveTexture(GL_TEXTURE0 + i); - if (!enabled) { - glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - glBindTexture(GL_TEXTURE_RECTANGLE, 0); - glBindTexture(GL_TEXTURE_1D, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glBindTexture(GL_TEXTURE_3D, 0); - continue; - } - - uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4]; - uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; - uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; - uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4]; - uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4]; - - unsigned int min_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); - unsigned int max_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); - - unsigned int pitch = - GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); - - unsigned int dma_select = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); - bool cubemap = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); - unsigned int dimensionality = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); - unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); - unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); - unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); - unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); - unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); - - unsigned int rect_width = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_WIDTH); - unsigned int rect_height = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_HEIGHT); -#ifdef DEBUG_NV2A - unsigned int lod_bias = - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); -#endif - unsigned int border_source = GET_MASK(fmt, - NV_PGRAPH_TEXFMT0_BORDER_SOURCE); - uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4]; - - hwaddr offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4]; - - bool palette_dma_select = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); - unsigned int palette_length_index = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); - unsigned int palette_offset = - palette & NV_PGRAPH_TEXPALETTE0_OFFSET; - - unsigned int palette_length = 0; - switch (palette_length_index) { - case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; - default: assert(false); break; - } - - /* Check for unsupported features */ - if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED"); - - nv2a_profile_inc_counter(NV2A_PROF_TEX_BIND); - - hwaddr dma_len; - uint8_t *texture_data; - if (dma_select) { - texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len); - } else { - texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len); - } - assert(offset < dma_len); - texture_data += offset; - hwaddr texture_vram_offset = texture_data - d->vram_ptr; - - hwaddr palette_dma_len; - uint8_t *palette_data; - if (palette_dma_select) { - palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len); - } else { - palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len); - } - assert(palette_offset < palette_dma_len); - palette_data += palette_offset; - hwaddr palette_vram_offset = palette_data - d->vram_ptr; - - NV2A_DPRINTF(" texture %d is format 0x%x, " - "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s)," - " filter %x %x, levels %d-%d %d bias %d\n", - i, color_format, offset, - rect_width, rect_height, - 1 << log_width, 1 << log_height, 1 << log_depth, - pitch, - cubemap ? "; cubemap" : "", - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN), - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG), - min_mipmap_level, max_mipmap_level, levels, - lod_bias); - - assert(color_format < ARRAY_SIZE(kelvin_color_format_map)); - ColorFormatInfo f = kelvin_color_format_map[color_format]; - if (f.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", - color_format); - abort(); - } - - unsigned int width, height, depth; - if (f.linear) { - assert(dimensionality == 2); - width = rect_width; - height = rect_height; - depth = 1; - } else { - width = 1 << log_width; - height = 1 << log_height; - depth = 1 << log_depth; - pitch = 0; - - levels = MIN(levels, max_mipmap_level + 1); - - /* Discard mipmap levels that would be smaller than 1x1. - * FIXME: Is this actually needed? - * - * >> Level 0: 32 x 4 - * Level 1: 16 x 2 - * Level 2: 8 x 1 - * Level 3: 4 x 1 - * Level 4: 2 x 1 - * Level 5: 1 x 1 - */ - levels = MIN(levels, MAX(log_width, log_height) + 1); - assert(levels > 0); - - if (dimensionality == 3) { - /* FIXME: What about 3D mipmaps? */ - if (log_width < 2 || log_height < 2) { - /* Base level is smaller than 4x4... */ - levels = 1; - } else { - levels = MIN(levels, MIN(log_width, log_height) - 1); - } - } - min_mipmap_level = MIN(levels-1, min_mipmap_level); - max_mipmap_level = MIN(levels-1, max_mipmap_level); - } - - size_t length = 0; - if (f.linear) { - assert(cubemap == false); - assert(dimensionality == 2); - length = height * pitch; - } else { - if (dimensionality >= 2) { - unsigned int w = width, h = height; - int level; - if (f.gl_format != 0) { - for (level = 0; level < levels; level++) { - w = MAX(w, 1); - h = MAX(h, 1); - length += w * h * f.bytes_per_pixel; - w /= 2; - h /= 2; - } - } else { - /* Compressed textures are a bit different */ - unsigned int block_size = - f.gl_internal_format == - GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? - 8 : 16; - for (level = 0; level < levels; level++) { - w = MAX(w, 1); - h = MAX(h, 1); - unsigned int phys_w = (w + 3) & ~3, - phys_h = (h + 3) & ~3; - length += phys_w/4 * phys_h/4 * block_size; - w /= 2; - h /= 2; - } - } - if (cubemap) { - assert(dimensionality == 2); - length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); - length *= 6; - } - if (dimensionality >= 3) { - length *= depth; - } - } - } - - bool is_bordered = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR; - - assert((texture_vram_offset + length) < memory_region_size(d->vram)); - assert((palette_vram_offset + palette_length) - < memory_region_size(d->vram)); - bool is_indexed = (color_format == - NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); - bool possibly_dirty = false; - bool possibly_dirty_checked = false; - - SurfaceBinding *surface = pgraph_surface_get(d, texture_vram_offset); - TextureBinding *tbind = pg->texture_binding[i]; - if (!pg->texture_dirty[i] && tbind) { - bool reusable = false; - if (surface && tbind->draw_time == surface->draw_time) { - reusable = true; - } else if (!surface) { - possibly_dirty = pgraph_check_texture_possibly_dirty( - d, - texture_vram_offset, - length, - palette_vram_offset, - is_indexed ? palette_length : 0); - possibly_dirty_checked = true; - reusable = !possibly_dirty; - } - - if (reusable) { - glBindTexture(pg->texture_binding[i]->gl_target, - pg->texture_binding[i]->gl_texture); - apply_texture_parameters(pg->texture_binding[i], - &f, - dimensionality, - filter, - address, - is_bordered, - border_color); - continue; - } - } - - TextureShape state; - memset(&state, 0, sizeof(TextureShape)); - state.cubemap = cubemap; - state.dimensionality = dimensionality; - state.color_format = color_format; - state.levels = levels; - state.width = width; - state.height = height; - state.depth = depth; - state.min_mipmap_level = min_mipmap_level; - state.max_mipmap_level = max_mipmap_level; - state.pitch = pitch; - state.border = is_bordered; - - /* - * Check active surfaces to see if this texture was a render target - */ - bool surf_to_tex = false; - if (surface != NULL) { - surf_to_tex = pgraph_check_surface_to_texture_compatibility( - surface, &state); - - if (surf_to_tex && surface->upload_pending) { - pgraph_upload_surface_data(d, surface, false); - } - } - - if (!surf_to_tex) { - // FIXME: Restructure to support rendering surfaces to cubemap faces - - // Writeback any surfaces which this texture may index - hwaddr tex_vram_end = texture_vram_offset + length - 1; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr >= tex_vram_end - || texture_vram_offset >= surf_vram_end); - if (overlapping) { - pgraph_download_surface_data_if_dirty(d, surface); - } - } - } - - TextureKey key; - memset(&key, 0, sizeof(TextureKey)); - key.state = state; - key.texture_vram_offset = texture_vram_offset; - key.texture_length = length; - if (is_indexed) { - key.palette_vram_offset = palette_vram_offset; - key.palette_length = palette_length; - } - - // Search for existing texture binding in cache - uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key)); - LruNode *found = lru_lookup(&pg->texture_cache, - tex_binding_hash, &key); - TextureLruNode *key_out = container_of(found, TextureLruNode, node); - possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty; - - if (!surf_to_tex && !possibly_dirty_checked) { - possibly_dirty |= pgraph_check_texture_possibly_dirty( - d, - texture_vram_offset, - length, - palette_vram_offset, - is_indexed ? palette_length : 0); - } - - // Calculate hash of texture data, if necessary - uint64_t tex_data_hash = 0; - if (!surf_to_tex && possibly_dirty) { - tex_data_hash = fast_hash(texture_data, length); - if (is_indexed) { - tex_data_hash ^= fast_hash(palette_data, palette_length); - } - } - - // Free existing binding, if texture data has changed - bool must_destroy = (key_out->binding != NULL) - && possibly_dirty - && (key_out->binding->data_hash != tex_data_hash); - if (must_destroy) { - texture_binding_destroy(key_out->binding); - key_out->binding = NULL; - } - - if (key_out->binding == NULL) { - // Must create the texture - key_out->binding = generate_texture(state, texture_data, palette_data); - key_out->binding->data_hash = tex_data_hash; - key_out->binding->scale = 1; - } else { - // Saved an upload! Reuse existing texture in graphics memory. - glBindTexture(key_out->binding->gl_target, - key_out->binding->gl_texture); - } - - key_out->possibly_dirty = false; - TextureBinding *binding = key_out->binding; - binding->refcnt++; - - if (surf_to_tex && binding->draw_time < surface->draw_time) { - - trace_nv2a_pgraph_surface_render_to_texture( - surface->vram_addr, surface->width, surface->height); - pgraph_render_surface_to_texture(d, surface, binding, &state, i); - binding->draw_time = surface->draw_time; - if (binding->gl_target == GL_TEXTURE_RECTANGLE) { - binding->scale = pg->surface_scale_factor; - } else { - binding->scale = 1; - } - } - - apply_texture_parameters(binding, - &f, - dimensionality, - filter, - address, - is_bordered, - border_color); - - if (pg->texture_binding[i]) { - if (pg->texture_binding[i]->gl_target != binding->gl_target) { - glBindTexture(pg->texture_binding[i]->gl_target, 0); - } - texture_binding_destroy(pg->texture_binding[i]); - } - pg->texture_binding[i] = binding; - pg->texture_dirty[i] = false; - } - NV2A_GL_DGROUP_END(); -} - -static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - switch (pg->surface_shape.anti_aliasing) { - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: - break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: - if (width) { *width *= 2; } - break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: - if (width) { *width *= 2; } - if (height) { *height *= 2; } - break; - default: - assert(false); - break; - } -} - -static void pgraph_apply_scaling_factor(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - *width *= pg->surface_scale_factor; - *height *= pg->surface_scale_factor; -} - -static void pgraph_get_surface_dimensions(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - if (swizzle) { - *width = 1 << pg->surface_shape.log_width; - *height = 1 << pg->surface_shape.log_height; - } else { - *width = pg->surface_shape.clip_width; - *height = pg->surface_shape.clip_height; - } -} - -static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, - bool quick) -{ - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); - - hwaddr end = TARGET_PAGE_ALIGN(addr + size); - addr &= TARGET_PAGE_MASK; - assert(end < memory_region_size(d->vram)); - - static hwaddr last_addr, last_end; - if (quick && (addr >= last_addr) && (end <= last_end)) { - return; - } - last_addr = addr; - last_end = end; - - size = end - addr; - if (memory_region_test_and_clear_dirty(d->vram, addr, size, - DIRTY_MEMORY_NV2A)) { - glBufferSubData(GL_ARRAY_BUFFER, addr, size, - d->vram_ptr + addr); - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); - } -} - -static void pgraph_update_inline_value(VertexAttribute *attr, - const uint8_t *data) -{ - assert(attr->count <= 4); - attr->inline_value[0] = 0.0f; - attr->inline_value[1] = 0.0f; - attr->inline_value[2] = 0.0f; - attr->inline_value[3] = 1.0f; - - switch (attr->format) { - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: - for (uint32_t i = 0; i < attr->count; ++i) { - attr->inline_value[i] = (float)data[i] / 255.0f; - } - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: { - const int16_t *val = (const int16_t *) data; - for (uint32_t i = 0; i < attr->count; ++i, ++val) { - attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f); - } - break; - } - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: - memcpy(attr->inline_value, data, attr->size * attr->count); - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: { - const int16_t *val = (const int16_t *) data; - for (uint32_t i = 0; i < attr->count; ++i, ++val) { - attr->inline_value[i] = (float)*val; - } - break; - } - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { - /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ - const int32_t val = *(const int32_t *)data; - int32_t x = val & 0x7FF; - if (x & 0x400) { - x |= 0xFFFFF800; - } - int32_t y = (val >> 11) & 0x7FF; - if (y & 0x400) { - y |= 0xFFFFF800; - } - int32_t z = (val >> 22) & 0x7FF; - if (z & 0x200) { - z |= 0xFFFFFC00; - } - - attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f); - attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f); - attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f); - break; - } - default: - fprintf(stderr, "Unknown vertex attribute type: 0x%x for format 0x%x\n", - attr->gl_type, attr->format); - assert(!"Unsupported attribute type"); - break; - } -} - -static void pgraph_bind_vertex_attributes(NV2AState *d, - unsigned int min_element, - unsigned int max_element, - bool inline_data, - unsigned int inline_stride, - unsigned int provoking_element) -{ - PGRAPHState *pg = &d->pgraph; - bool updated_memory_buffer = false; - unsigned int num_elements = max_element - min_element + 1; - - if (inline_data) { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", - __func__, num_elements, inline_stride); - } else { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); - } - - pg->compressed_attrs = 0; - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - - if (!attr->count) { - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - continue; - } - - nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); - hwaddr attrib_data_addr; - size_t stride; - - if (attr->needs_conversion) { - pg->compressed_attrs |= (1 << i); - } - - hwaddr start = 0; - if (inline_data) { - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - attrib_data_addr = attr->inline_array_offset; - stride = inline_stride; - } else { - hwaddr dma_len; - uint8_t *attr_data = (uint8_t *)nv_dma_map( - d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, - &dma_len); - assert(attr->offset < dma_len); - attrib_data_addr = attr_data + attr->offset - d->vram_ptr; - stride = attr->stride; - start = attrib_data_addr + min_element * stride; - pgraph_update_memory_buffer(d, start, num_elements * stride, - updated_memory_buffer); - updated_memory_buffer = true; - } - - uint32_t provoking_element_index = provoking_element - min_element; - size_t element_size = attr->size * attr->count; - assert(element_size <= sizeof(attr->inline_value)); - const uint8_t *last_entry; - - if (inline_data) { - last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset; - } else { - last_entry = d->vram_ptr + start; - } - if (!stride) { - // Stride of 0 indicates that only the first element should be - // used. - pgraph_update_inline_value(attr, last_entry); - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - continue; - } - - if (attr->needs_conversion) { - glVertexAttribIPointer(i, attr->gl_count, attr->gl_type, stride, - (void *)attrib_data_addr); - } else { - glVertexAttribPointer(i, attr->gl_count, attr->gl_type, - attr->gl_normalize, stride, - (void *)attrib_data_addr); - } - - glEnableVertexAttribArray(i); - last_entry += stride * provoking_element_index; - pgraph_update_inline_value(attr, last_entry); - } - - NV2A_GL_DGROUP_END(); -} - -static unsigned int pgraph_bind_inline_array(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - unsigned int offset = 0; - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - if (attr->count == 0) { - continue; - } - - /* FIXME: Double check */ - offset = ROUND_UP(offset, attr->size); - attr->inline_array_offset = offset; - NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", - i, attr->size, attr->count); - offset += attr->size * attr->count; - offset = ROUND_UP(offset, attr->size); - } - - unsigned int vertex_size = offset; - unsigned int index_count = pg->inline_array_length*4 / vertex_size; - - NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); - - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t), - NULL, GL_STREAM_DRAW); - glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array); - pgraph_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size, - index_count-1); - - return index_count; -} - -/* 16 bit to [0.0, F16_MAX = 511.9375] */ -static float convert_f16_to_float(uint16_t f16) { - if (f16 == 0x0000) { return 0.0; } - uint32_t i = (f16 << 11) + 0x3C000000; - return *(float*)&i; -} - -/* 24 bit to [0.0, F24_MAX] */ -static float convert_f24_to_float(uint32_t f24) { - assert(!(f24 >> 24)); - f24 &= 0xFFFFFF; - if (f24 == 0x000000) { return 0.0; } - uint32_t i = f24 << 7; - return *(float*)&i; -} - -static uint8_t cliptobyte(int x) -{ - return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); -} - -static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, - uint8_t *r, uint8_t *g, uint8_t* b) { - int c, d, e; - c = (int)line[ix * 2] - 16; - if (ix % 2) { - d = (int)line[ix * 2 - 1] - 128; - e = (int)line[ix * 2 + 1] - 128; - } else { - d = (int)line[ix * 2 + 1] - 128; - e = (int)line[ix * 2 + 3] - 128; - } - *r = cliptobyte((298 * c + 409 * e + 128) >> 8); - *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); - *b = cliptobyte((298 * c + 516 * d + 128) >> 8); -} - -static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, - uint8_t *r, uint8_t *g, uint8_t* b) { - int c, d, e; - c = (int)line[ix * 2 + 1] - 16; - if (ix % 2) { - d = (int)line[ix * 2 - 2] - 128; - e = (int)line[ix * 2 + 0] - 128; - } else { - d = (int)line[ix * 2 + 0] - 128; - e = (int)line[ix * 2 + 2] - 128; - } - *r = cliptobyte((298 * c + 409 * e + 128) >> 8); - *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); - *b = cliptobyte((298 * c + 516 * d + 128) >> 8); -} - -static uint8_t* convert_texture_data(const TextureShape s, - const uint8_t *data, - const uint8_t *palette_data, - unsigned int width, - unsigned int height, - unsigned int depth, - unsigned int row_pitch, - unsigned int slice_pitch) -{ - if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { - uint8_t* converted_data = (uint8_t*)g_malloc(width * height * depth * 4); - int x, y, z; - const uint8_t* src = data; - uint32_t* dst = (uint32_t*)converted_data; - for (z = 0; z < depth; z++) { - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint8_t index = src[y * row_pitch + x]; - uint32_t color = *(uint32_t * )(palette_data + index * 4); - *dst++ = color; - } - } - src += slice_pitch; - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || - s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { - // TODO: Investigate whether a non-1 depth is possible. - // Generally the hardware asserts when attempting to use volumetric - // textures in linear formats. - assert(depth == 1); /* FIXME */ - // FIXME: only valid if control0 register allows for colorspace conversion - uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4); - int x, y; - uint8_t* pixel = converted_data; - for (y = 0; y < height; y++) { - const uint8_t* line = &data[y * row_pitch * depth]; - for (x = 0; x < width; x++, pixel += 4) { - if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { - convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - } else { - convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - } - pixel[3] = 255; - } - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { - assert(depth == 1); /* FIXME */ - uint8_t *converted_data = (uint8_t*)g_malloc(width * height * 3); - int x, y; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2); - int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3]; - /* Maps 5 bit G and B signed value range to 8 bit - * signed values. R is probably unsigned. - */ - rgb655 ^= (1 << 9) | (1 << 4); - pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; - pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; - pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; - } - } - return converted_data; - } else { - return NULL; - } -} - -static void upload_gl_texture(GLenum gl_target, - const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); - - unsigned int adjusted_width = s.width; - unsigned int adjusted_height = s.height; - unsigned int adjusted_pitch = s.pitch; - unsigned int adjusted_depth = s.depth; - if (!f.linear && s.border) { - adjusted_width = MAX(16, adjusted_width * 2); - adjusted_height = MAX(16, adjusted_height * 2); - adjusted_pitch = adjusted_width * (s.pitch / s.width); - adjusted_depth = MAX(16, s.depth * 2); - } - - switch(gl_target) { - case GL_TEXTURE_1D: - assert(false); - break; - case GL_TEXTURE_RECTANGLE: { - /* Can't handle strides unaligned to pixels */ - assert(s.pitch % f.bytes_per_pixel == 0); - - uint8_t *converted = convert_texture_data(s, texture_data, - palette_data, - adjusted_width, - adjusted_height, 1, - adjusted_pitch, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, - converted ? 0 : adjusted_pitch / f.bytes_per_pixel); - glTexImage2D(gl_target, 0, f.gl_internal_format, - adjusted_width, adjusted_height, 0, - f.gl_format, f.gl_type, - converted ? converted : texture_data); - - if (converted) { - g_free(converted); - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - break; - } - case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { - - unsigned int width = adjusted_width, height = adjusted_height; - - int level; - for (level = 0; level < s.levels; level++) { - width = MAX(width, 1); - height = MAX(height, 1); - - if (f.gl_format == 0) { /* compressed */ - // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size - unsigned int block_size = - f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? - 8 : 16; - unsigned int physical_width = (width + 3) & ~3, - physical_height = (height + 3) & ~3; - if (physical_width != width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); - } - uint8_t *converted = decompress_2d_texture_data( - f.gl_internal_format, texture_data, physical_width, - physical_height); - unsigned int tex_width = width; - unsigned int tex_height = height; - - if (s.cubemap && adjusted_width != s.width) { - // FIXME: Consider preserving the border. - // There does not seem to be a way to reference the border - // texels in a cubemap, so they are discarded. - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); - glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); - tex_width = s.width; - tex_height = s.height; - if (physical_width == width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); - } - } - - glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0, - GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, converted); - g_free(converted); - if (physical_width != width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - if (s.cubemap && adjusted_width != s.width) { - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); - glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); - if (physical_width == width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - } - texture_data += - physical_width / 4 * physical_height / 4 * block_size; - } else { - unsigned int pitch = width * f.bytes_per_pixel; - uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); - unswizzle_rect(texture_data, width, height, - unswizzled, pitch, f.bytes_per_pixel); - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, 1, - pitch, 0); - uint8_t *pixel_data = converted ? converted : unswizzled; - unsigned int tex_width = width; - unsigned int tex_height = height; - - if (s.cubemap && adjusted_width != s.width) { - // FIXME: Consider preserving the border. - // There does not seem to be a way to reference the border - // texels in a cubemap, so they are discarded. - glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); - tex_width = s.width; - tex_height = s.height; - pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; - } - - glTexImage2D(gl_target, level, f.gl_internal_format, tex_width, - tex_height, 0, f.gl_format, f.gl_type, - pixel_data); - if (s.cubemap && s.border) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * f.bytes_per_pixel; - } - - width /= 2; - height /= 2; - } - - break; - } - case GL_TEXTURE_3D: { - - unsigned int width = adjusted_width; - unsigned int height = adjusted_height; - unsigned int depth = adjusted_depth; - - assert(f.linear == false); - - int level; - for (level = 0; level < s.levels; level++) { - if (f.gl_format == 0) { /* compressed */ - assert(width % 4 == 0 && height % 4 == 0 && - "Compressed 3D texture virtual size"); - width = MAX(width, 4); - height = MAX(height, 4); - depth = MAX(depth, 1); - - unsigned int block_size; - if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - size_t texture_size = width/4 * height/4 * depth * block_size; - - uint8_t *converted = decompress_3d_texture_data(f.gl_internal_format, texture_data, width, height, depth); - - glTexImage3D(gl_target, level, GL_RGBA8, - width, height, depth, 0, - GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, - converted); - - g_free(converted); - - texture_data += texture_size; - } else { - width = MAX(width, 1); - height = MAX(height, 1); - depth = MAX(depth, 1); - - unsigned int row_pitch = width * f.bytes_per_pixel; - unsigned int slice_pitch = row_pitch * height; - uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth); - unswizzle_box(texture_data, width, height, depth, unswizzled, - row_pitch, slice_pitch, f.bytes_per_pixel); - - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, depth, - row_pitch, slice_pitch); - - glTexImage3D(gl_target, level, f.gl_internal_format, - width, height, depth, 0, - f.gl_format, f.gl_type, - converted ? converted : unswizzled); - - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * depth * f.bytes_per_pixel; - } - - width /= 2; - height /= 2; - depth /= 2; - } - break; - } - default: - assert(false); - break; - } -} - -static TextureBinding* generate_texture(const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - - /* Create a new opengl texture */ - GLuint gl_texture; - glGenTextures(1, &gl_texture); - - GLenum gl_target; - if (s.cubemap) { - assert(f.linear == false); - assert(s.dimensionality == 2); - gl_target = GL_TEXTURE_CUBE_MAP; - } else { - if (f.linear) { - /* linear textures use unnormalised texcoords. - * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but - * does not allow repeat and mirror wrap modes. - * (or mipmapping, but xbox d3d says 'Non swizzled and non - * compressed textures cannot be mip mapped.') - * Not sure if that'll be an issue. */ - - /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ - gl_target = GL_TEXTURE_RECTANGLE; - assert(s.dimensionality == 2); - } else { - switch(s.dimensionality) { - case 1: gl_target = GL_TEXTURE_1D; break; - case 2: gl_target = GL_TEXTURE_2D; break; - case 3: gl_target = GL_TEXTURE_3D; break; - default: - assert(false); - break; - } - } - } - - glBindTexture(gl_target, gl_texture); - - NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, - "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, " - "width: %d, height: %d, depth: %d", - texture_data - g_nv2a->vram_ptr, - s.color_format, f.linear ? "" : " (SZ)", - s.dimensionality, s.cubemap ? " (Cubemap)" : "", - s.width, s.height, s.depth); - - if (gl_target == GL_TEXTURE_CUBE_MAP) { - - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - unsigned int block_size; - if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - size_t length = 0; - unsigned int w = s.width; - unsigned int h = s.height; - if (!f.linear && s.border) { - w = MAX(16, w * 2); - h = MAX(16, h * 2); - } - - int level; - for (level = 0; level < s.levels; level++) { - if (f.gl_format == 0) { - length += w/4 * h/4 * block_size; - } else { - length += w * h * f.bytes_per_pixel; - } - - w /= 2; - h /= 2; - } - - length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); - - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, - s, texture_data + 0 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, - s, texture_data + 1 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, - s, texture_data + 2 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, - s, texture_data + 3 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, - s, texture_data + 4 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, - s, texture_data + 5 * length, palette_data); - } else { - upload_gl_texture(gl_target, s, texture_data, palette_data); - } - - /* Linear textures don't support mipmapping */ - if (!f.linear) { - glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, - s.min_mipmap_level); - glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, - s.levels - 1); - } - - if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 - || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { - glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, - (const GLint *)f.gl_swizzle_mask); - } - - TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding)); - ret->gl_target = gl_target; - ret->gl_texture = gl_texture; - ret->refcnt = 1; - ret->draw_time = 0; - ret->data_hash = 0; - ret->min_filter = 0xFFFFFFFF; - ret->mag_filter = 0xFFFFFFFF; - ret->addru = 0xFFFFFFFF; - ret->addrv = 0xFFFFFFFF; - ret->addrp = 0xFFFFFFFF; - ret->border_color_set = false; - return ret; -} - -static void texture_binding_destroy(gpointer data) -{ - TextureBinding *binding = (TextureBinding *)data; - assert(binding->refcnt > 0); - binding->refcnt--; - if (binding->refcnt == 0) { - glDeleteTextures(1, &binding->gl_texture); - g_free(binding); - } -} - -/* functions for texture LRU cache */ -static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - memcpy(&tnode->key, key, sizeof(TextureKey)); - - tnode->binding = NULL; - tnode->possibly_dirty = false; -} - -static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - if (tnode->binding) { - texture_binding_destroy(tnode->binding); - tnode->binding = NULL; - tnode->possibly_dirty = false; - } -} - -static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - return memcmp(&tnode->key, key, sizeof(TextureKey)); -} - -static unsigned int kelvin_map_stencil_op(uint32_t parameter) -{ - unsigned int op; - switch (parameter) { - case NV097_SET_STENCIL_OP_V_KEEP: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; - case NV097_SET_STENCIL_OP_V_ZERO: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; - case NV097_SET_STENCIL_OP_V_REPLACE: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; - case NV097_SET_STENCIL_OP_V_INCRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; - case NV097_SET_STENCIL_OP_V_DECRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; - case NV097_SET_STENCIL_OP_V_INVERT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; - case NV097_SET_STENCIL_OP_V_INCR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; - case NV097_SET_STENCIL_OP_V_DECR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; - default: - assert(false); - break; - } - return op; -} - -static unsigned int kelvin_map_polygon_mode(uint32_t parameter) -{ - unsigned int mode; - switch (parameter) { - case NV097_SET_FRONT_POLYGON_MODE_V_POINT: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; - case NV097_SET_FRONT_POLYGON_MODE_V_LINE: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; - case NV097_SET_FRONT_POLYGON_MODE_V_FILL: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; - default: - assert(false); - break; - } - return mode; -} - -static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) -{ - assert(channel < 4); - unsigned int texgen; - switch (parameter) { - case NV097_SET_TEXGEN_S_DISABLE: - texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; - case NV097_SET_TEXGEN_S_EYE_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; - case NV097_SET_TEXGEN_S_OBJECT_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; - case NV097_SET_TEXGEN_S_SPHERE_MAP: - assert(channel < 2); - texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; - case NV097_SET_TEXGEN_S_REFLECTION_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; - case NV097_SET_TEXGEN_S_NORMAL_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; - default: - assert(false); - break; - } - return texgen; -} diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c new file mode 100644 index 0000000000..ded339e23f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c @@ -0,0 +1,84 @@ +/* + * Geforce NV2A PGRAPH Renderdoc Helpers + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" + +#include +#include + +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include "thirdparty/renderdoc_app.h" + +#include "hw/xbox/nv2a/debug.h" + +#ifdef _WIN32 +#include +#else +#include +#endif + +static RENDERDOC_API_1_6_0 *rdoc_api = NULL; + +int renderdoc_capture_frames = 0; + +void nv2a_dbg_renderdoc_init(void) +{ + if (rdoc_api) { + return; + } + +#ifdef _WIN32 + HMODULE renderdoc = GetModuleHandleA("renderdoc.dll"); + if (renderdoc) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI"); +#else + void *renderdoc = dlopen( +#ifdef __APPLE__ + "librenderdoc.dylib", +#else + "librenderdoc.so", +#endif + RTLD_LAZY); + if (renderdoc) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI"); +#endif // _WIN32 + int ret = + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api); + assert(ret == 1 && "Failed to retrieve RenderDoc API."); + } else { + fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror()); + } +} + +void *nv2a_dbg_renderdoc_get_api(void) +{ + return (void*)rdoc_api; +} + +bool nv2a_dbg_renderdoc_available(void) +{ + return rdoc_api != NULL; +} + +void nv2a_dbg_renderdoc_capture_frames(int num_frames) +{ + renderdoc_capture_frames += num_frames; +} diff --git a/hw/xbox/nv2a/pgraph/gl/blit.c b/hw/xbox/nv2a/pgraph/gl/blit.c new file mode 100644 index 0000000000..b4cce8a5ef --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/blit.c @@ -0,0 +1,174 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +// TODO: Optimize. Ideally this should all be done via OpenGL. +void pgraph_gl_image_blit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + pgraph_gl_surface_update(d, false, true, true); + + assert(context_surfaces->object_instance == image_blit->context_surfaces); + + unsigned int bytes_per_pixel; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_Y8: + bytes_per_pixel = 1; + break; + case NV062_SET_COLOR_FORMAT_LE_R5G6B5: + bytes_per_pixel = 2; + break; + case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_Y32: + bytes_per_pixel = 4; + break; + default: + fprintf(stderr, "Unknown blit surface format: 0x%x\n", + context_surfaces->color_format); + assert(false); + break; + } + + hwaddr source_dma_len, dest_dma_len; + + uint8_t *source = (uint8_t *)nv_dma_map( + d, context_surfaces->dma_image_source, &source_dma_len); + assert(context_surfaces->source_offset < source_dma_len); + source += context_surfaces->source_offset; + + uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, + &dest_dma_len); + assert(context_surfaces->dest_offset < dest_dma_len); + dest += context_surfaces->dest_offset; + + hwaddr source_addr = source - d->vram_ptr; + hwaddr dest_addr = dest - d->vram_ptr; + + SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr); + if (surf_src) { + pgraph_gl_surface_download_if_dirty(d, surf_src); + } + + SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr); + if (surf_dest) { + if (image_blit->height < surf_dest->height || + image_blit->width < surf_dest->width) { + pgraph_gl_surface_download_if_dirty(d, surf_dest); + } else { + // The blit will completely replace the surface so any pending + // download should be discarded. + surf_dest->download_pending = false; + surf_dest->draw_dirty = false; + } + surf_dest->upload_pending = true; + pg->draw_time++; + } + + hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + + image_blit->in_x * bytes_per_pixel; + hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + + image_blit->out_x * bytes_per_pixel; + + hwaddr source_size = + (image_blit->height - 1) * context_surfaces->source_pitch + + image_blit->width * bytes_per_pixel; + hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + + image_blit->width * bytes_per_pixel; + + /* FIXME: What does hardware do in this case? */ + assert(source_addr + source_offset + source_size <= + memory_region_size(d->vram)); + assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); + + uint8_t *source_row = source + source_offset; + uint8_t *dest_row = dest + dest_offset; + + if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); + for (unsigned int y = 0; y < image_blit->height; y++) { + memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); + uint32_t max_beta_mult = 0x7f80; + uint32_t beta_mult = beta->beta >> 16; + uint32_t inv_beta_mult = max_beta_mult - beta_mult; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + for (unsigned int ch = 0; ch < 3; ch++) { + uint32_t a = source_row[x * 4 + ch] * beta_mult; + uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; + dest_row[x * 4 + ch] = (a + b) / max_beta_mult; + } + } + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else { + fprintf(stderr, "Unknown blit operation: 0x%x\n", + image_blit->operation); + assert(false && "Unknown blit operation"); + } + + NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); + + bool needs_alpha_patching; + uint8_t alpha_override; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0xff; + break; + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0; + break; + default: + needs_alpha_patching = false; + alpha_override = 0; + } + + if (needs_alpha_patching) { + dest_row = dest + dest_offset; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + dest_row[x * 4 + 3] = alpha_override; + } + dest_row += context_surfaces->dest_pitch; + } + } + + dest_addr += dest_offset; + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_NV2A_TEX); +} diff --git a/hw/xbox/nv2a/pgraph/gl/constants.h b/hw/xbox/nv2a/pgraph/gl/constants.h new file mode 100644 index 0000000000..d78b0054e3 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/constants.h @@ -0,0 +1,322 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H +#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "gloffscreen.h" + +static const GLenum pgraph_texture_min_filter_gl_map[] = { + 0, + GL_NEAREST, + GL_LINEAR, + GL_NEAREST_MIPMAP_NEAREST, + GL_LINEAR_MIPMAP_NEAREST, + GL_NEAREST_MIPMAP_LINEAR, + GL_LINEAR_MIPMAP_LINEAR, + GL_LINEAR, +}; + +static const GLenum pgraph_texture_mag_filter_gl_map[] = { + 0, + GL_NEAREST, + GL_LINEAR, + 0, + GL_LINEAR /* TODO: Convolution filter... */ +}; + +static const GLenum pgraph_texture_addr_gl_map[] = { + 0, + GL_REPEAT, + GL_MIRRORED_REPEAT, + GL_CLAMP_TO_EDGE, + GL_CLAMP_TO_BORDER, + GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ +}; + +static const GLenum pgraph_blend_factor_gl_map[] = { + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + GL_SRC_ALPHA_SATURATE, + 0, + GL_CONSTANT_COLOR, + GL_ONE_MINUS_CONSTANT_COLOR, + GL_CONSTANT_ALPHA, + GL_ONE_MINUS_CONSTANT_ALPHA, +}; + +static const GLenum pgraph_blend_equation_gl_map[] = { + GL_FUNC_SUBTRACT, + GL_FUNC_REVERSE_SUBTRACT, + GL_FUNC_ADD, + GL_MIN, + GL_MAX, + GL_FUNC_REVERSE_SUBTRACT, + GL_FUNC_ADD, +}; + +/* FIXME +static const GLenum pgraph_blend_logicop_map[] = { + GL_CLEAR, + GL_AND, + GL_AND_REVERSE, + GL_COPY, + GL_AND_INVERTED, + GL_NOOP, + GL_XOR, + GL_OR, + GL_NOR, + GL_EQUIV, + GL_INVERT, + GL_OR_REVERSE, + GL_COPY_INVERTED, + GL_OR_INVERTED, + GL_NAND, + GL_SET, +}; +*/ + +static const GLenum pgraph_cull_face_gl_map[] = { + 0, + GL_FRONT, + GL_BACK, + GL_FRONT_AND_BACK +}; + +static const GLenum pgraph_depth_func_gl_map[] = { + GL_NEVER, + GL_LESS, + GL_EQUAL, + GL_LEQUAL, + GL_GREATER, + GL_NOTEQUAL, + GL_GEQUAL, + GL_ALWAYS, +}; + +static const GLenum pgraph_stencil_func_gl_map[] = { + GL_NEVER, + GL_LESS, + GL_EQUAL, + GL_LEQUAL, + GL_GREATER, + GL_NOTEQUAL, + GL_GEQUAL, + GL_ALWAYS, +}; + +static const GLenum pgraph_stencil_op_gl_map[] = { + 0, + GL_KEEP, + GL_ZERO, + GL_REPLACE, + GL_INCR, + GL_DECR, + GL_INVERT, + GL_INCR_WRAP, + GL_DECR_WRAP, +}; + +typedef struct ColorFormatInfo { + unsigned int bytes_per_pixel; + bool linear; + GLint gl_internal_format; + GLenum gl_format; + GLenum gl_type; + GLenum gl_swizzle_mask[4]; + bool depth; +} ColorFormatInfo; + +static const ColorFormatInfo kelvin_color_format_gl_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = + {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = + {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = + {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = + {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = + {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = + {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + /* paletted texture */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = + {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = + {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = + {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = + {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = + {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_GREEN}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = + {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = + {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = + {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = + {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = + {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_GREEN, GL_RED, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = + {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = + {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + /* Additional information is passed to the pixel shader via the swizzle: + * RED: The depth value. + * GREEN: 0 for 16-bit, 1 for 24 bit + * BLUE: 0 for fixed, 1 for float + */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = + {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, + {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = + {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = + /* FIXME: Uses fixed-point format to match surface format hack below. */ + {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = + {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, + {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = + {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, + {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = + {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = + {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = + {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = + {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = + {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = + {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = + {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8} +}; + +typedef struct SurfaceFormatInfo { + unsigned int bytes_per_pixel; + GLint gl_internal_format; + GLenum gl_format; + GLenum gl_type; + GLenum gl_attachment; +} SurfaceFormatInfo; + +static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = + {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = + {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = + {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = + {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, + + // FIXME: Map channel color + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = + {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = + {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0}, +}; + +static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = + {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT}, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for + * now just emulate this with fixed-point Z24S8. Possible compat + * improvement with custom conversion. + */ + {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, +}; + +static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = + {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT}, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, +}; + +#endif diff --git a/hw/xbox/nv2a/debug.c b/hw/xbox/nv2a/pgraph/gl/debug.c similarity index 77% rename from hw/xbox/nv2a/debug.c rename to hw/xbox/nv2a/pgraph/gl/debug.c index def94cdba1..8e7f49e47c 100644 --- a/hw/xbox/nv2a/debug.c +++ b/hw/xbox/nv2a/pgraph/gl/debug.c @@ -1,5 +1,5 @@ /* - * QEMU Geforce NV2A debug helpers + * Geforce NV2A PGRAPH OpenGL Renderer * * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2012 espes @@ -18,6 +18,7 @@ * License along with this library; if not, see . */ +#include "renderer.h" #include "debug.h" #ifdef DEBUG_NV2A_GL @@ -28,15 +29,8 @@ #include #ifdef CONFIG_RENDERDOC +#pragma GCC diagnostic ignored "-Wstrict-prototypes" #include "thirdparty/renderdoc_app.h" -#ifdef _WIN32 -#include -#else -#include -#endif - -static RENDERDOC_API_1_1_2 *rdoc_api = NULL; -static int32_t renderdoc_capture_frames = 0; #endif #define CHECK_GL_ERROR() do { \ @@ -74,31 +68,7 @@ void gl_debug_initialize(void) } #ifdef CONFIG_RENDERDOC - const char *renderdoc_lib; - void* renderdoc; -#ifdef __APPLE__ - renderdoc_lib = "librenderdoc.dylib"; -#elif _WIN32 - renderdoc_lib = "renderdoc.dll"; -#else - renderdoc_lib = "librenderdoc.so"; -#endif - -#ifdef _WIN32 - renderdoc = GetModuleHandleA(renderdoc_lib); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress( - renderdoc, "RENDERDOC_GetAPI"); -#else - renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym( - renderdoc, "RENDERDOC_GetAPI"); -#endif - int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, - (void **)&rdoc_api); - assert(ret == 1 && "Failed to retrieve RenderDoc API."); - } + nv2a_dbg_renderdoc_init(); #endif } @@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void) CHECK_GL_ERROR(); #ifdef CONFIG_RENDERDOC - if (rdoc_api) { + if (nv2a_dbg_renderdoc_available()) { + + RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api(); + if (rdoc_api->IsTargetControlConnected()) { if (rdoc_api->IsFrameCapturing()) { rdoc_api->EndFrameCapture(NULL, NULL); @@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void) error); } } - if (renderdoc_capture_frames) { + if (renderdoc_capture_frames > 0) { rdoc_api->StartFrameCapture(NULL, NULL); GLenum error = glGetError(); if (error != GL_NO_ERROR) { @@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void) } } #endif - if (!has_GL_GREMEDY_frame_terminator) { - return; + if (has_GL_GREMEDY_frame_terminator) { + glFrameTerminatorGREMEDY(); + CHECK_GL_ERROR(); } - - glFrameTerminatorGREMEDY(); - CHECK_GL_ERROR(); } -#ifdef CONFIG_RENDERDOC -bool nv2a_dbg_renderdoc_available(void) { - return rdoc_api != NULL; -} - -void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) { - renderdoc_capture_frames = num_frames; -} -#endif - #endif // DEBUG_NV2A_GL diff --git a/hw/xbox/nv2a/pgraph/gl/debug.h b/hw/xbox/nv2a/pgraph/gl/debug.h new file mode 100644 index 0000000000..c242e1f384 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/debug.h @@ -0,0 +1,60 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2012 espes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H +#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H + +// #define DEBUG_NV2A_GL +#ifdef DEBUG_NV2A_GL + +#include +#include "gloffscreen.h" +#include "config-host.h" + +void gl_debug_initialize(void); +void gl_debug_message(bool cc, const char *fmt, ...); +void gl_debug_group_begin(const char *fmt, ...); +void gl_debug_group_end(void); +void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...); +void gl_debug_frame_terminator(void); + +# define NV2A_GL_DPRINTF(cc, format, ...) \ + gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__) +# define NV2A_GL_DGROUP_BEGIN(format, ...) \ + gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__) +# define NV2A_GL_DGROUP_END() \ + gl_debug_group_end() +# define NV2A_GL_DLABEL(target, name, format, ...) \ + gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__) +#define NV2A_GL_DFRAME_TERMINATOR() \ + gl_debug_frame_terminator() + +#else + +# define NV2A_GL_DPRINTF(cc, format, ...) do { \ + if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \ + } while (0) +# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0) +# define NV2A_GL_DGROUP_END() do { } while (0) +# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0) +# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0) +#endif + +#endif diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c new file mode 100644 index 0000000000..804fec2c2d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -0,0 +1,407 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "renderer.h" + +#include + +void pgraph_gl_init_display_renderer(NV2AState *d) +{ + struct PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glGenTextures(1, &r->gl_display_buffer); + r->gl_display_buffer_internal_format = 0; + r->gl_display_buffer_width = 0; + r->gl_display_buffer_height = 0; + r->gl_display_buffer_format = 0; + r->gl_display_buffer_type = 0; + + const char *vs = + "#version 330\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + /* FIXME: improve interlace handling, pvideo */ + + const char *fs = + "#version 330\n" + "uniform sampler2D tex;\n" + "uniform bool pvideo_enable;\n" + "uniform sampler2D pvideo_tex;\n" + "uniform vec2 pvideo_in_pos;\n" + "uniform vec4 pvideo_pos;\n" + "uniform vec3 pvideo_scale;\n" + "uniform bool pvideo_color_key_enable;\n" + "uniform vec4 pvideo_color_key;\n" + "uniform vec2 display_size;\n" + "uniform float line_offset;\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord = gl_FragCoord.xy/display_size;\n" + " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" + " texCoord.y = 1 + rel*(texCoord.y - 1);" + " out_Color.rgba = texture(tex, texCoord);\n" + " if (pvideo_enable) {\n" + " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" + " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" + " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" + " greaterThan(screenCoord, output_region.zw));\n" + " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" + " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" + " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" + " in_st.y *= -1.0;\n" + " out_Color.rgba = texture(pvideo_tex, in_st);\n" + " }\n" + " }\n" + "}\n"; + + r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs); + r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex"); + r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable"); + r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex"); + r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos"); + r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos"); + r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale"); + r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable"); + r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key"); + r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size"); + r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset"); + + glGenVertexArrays(1, &r->disp_rndr.vao); + glBindVertexArray(r->disp_rndr.vao); + glGenBuffers(1, &r->disp_rndr.vbo); + glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); + glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); + glGenFramebuffers(1, &r->disp_rndr.fbo); + glGenTextures(1, &r->disp_rndr.pvideo_tex); + assert(glGetError() == GL_NO_ERROR); +} + +static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, + unsigned int width, + unsigned int height, + unsigned int pitch) +{ + uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4); + int x, y; + for (y = 0; y < height; y++) { + const uint8_t *line = &data[y * pitch]; + const uint32_t row_offset = y * width; + for (x = 0; x < width; x++) { + uint8_t *pixel = &converted_data[(row_offset + x) * 4]; + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); + pixel[3] = 255; + } + } + return converted_data; +} + +static float pvideo_calculate_scale(unsigned int din_dout, + unsigned int output_size) +{ + float calculated_in = din_dout * (output_size - 1); + calculated_in = floorf(calculated_in / (1 << 20) + 0.5f); + return (calculated_in + 1.0f) / output_size; +} + +static void render_display_pvideo_overlay(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. + // Many games seem to pass this value when initializing or tearing down + // PVIDEO. On its own, this generally does not result in the overlay being + // hidden, however there are certain games (e.g., Ultimate Beach Soccer) + // that use an unknown mechanism to hide the overlay without explicitly + // stopping it. + // Since the value seems to be set to 0xFFFFFFFF only in cases where the + // content is not valid, it is probably good enough to treat it as an + // implicit stop. + bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) + && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; + glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled); + if (!enabled) { + return; + } + + hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; + hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + + int in_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); + int in_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); + + int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + + int in_pitch = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); + int in_color = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); + + unsigned int out_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); + unsigned int out_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); + + float scale_x = 1.0f; + float scale_y = 1.0f; + unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; + unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; + if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_x = pvideo_calculate_scale(ds_dx, out_width); + } + if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_y = pvideo_calculate_scale(dt_dy, out_height); + } + + // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results + // in them being capped to the output size, content is not scaled. This is + // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF + // during initialization or teardown. + if (in_width > out_width) { + in_width = floorf((float)out_width * scale_x + 0.5f); + } + if (in_height > out_height) { + in_height = floorf((float)out_height * scale_y + 0.5f); + } + + /* TODO: support other color formats */ + assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + unsigned int out_x = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); + unsigned int out_y = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); + + unsigned int color_key_enabled = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); + glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc, + color_key_enabled); + + // TODO: Verify that masking off the top byte is correct. + // SeaBlade sets a color key of 0x80000000 but the texture passed into the + // shader is cleared to 0 alpha. + unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; + glUniform4f(r->disp_rndr.pvideo_color_key_loc, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); + + assert(offset + in_pitch * in_height <= limit); + hwaddr end = base + offset + in_pitch * in_height; + assert(end <= memory_region_size(d->vram)); + + pgraph_apply_scaling_factor(pg, &out_x, &out_y); + pgraph_apply_scaling_factor(pg, &out_width, &out_height); + + // Translate for the GL viewport origin. + out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); + + glActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( + d->vram_ptr + base + offset, in_width, in_height, in_pitch); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, + GL_UNSIGNED_BYTE, tex_rgba); + g_free(tex_rgba); + glUniform1i(r->disp_rndr.pvideo_tex_loc, 1); + glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t); + glUniform4f(r->disp_rndr.pvideo_pos_loc, + out_x, out_y, out_width, out_height); + glUniform3f(r->disp_rndr.pvideo_scale_loc, + scale_x, scale_y, 1.0f / pg->surface_scale_factor); +} + +static void render_display(NV2AState *d, SurfaceBinding *surface) +{ + struct PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int width, height; + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + int line_offset = surface->pitch / pline_offset; + + /* Adjust viewport height for interlaced mode, used only in 1080i */ + if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { + height *= 2; + } + + pgraph_apply_scaling_factor(pg, &width, &height); + + glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer); + bool recreate = ( + surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format + || width != r->gl_display_buffer_width + || height != r->gl_display_buffer_height + || surface->fmt.gl_format != r->gl_display_buffer_format + || surface->fmt.gl_type != r->gl_display_buffer_type + ); + + if (recreate) { + /* XXX: There's apparently a bug in some Intel OpenGL drivers for + * Windows that will leak this texture when its orphaned after use in + * another context, apparently regardless of which thread it's created + * or released on. + * + * Driver: 27.20.100.8729 9/11/2020 W10 x64 + * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423 + */ + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; + r->gl_display_buffer_width = width; + r->gl_display_buffer_height = height; + r->gl_display_buffer_format = surface->fmt.gl_format; + r->gl_display_buffer_type = surface->fmt.gl_type; + glTexImage2D(GL_TEXTURE_2D, 0, + r->gl_display_buffer_internal_format, + r->gl_display_buffer_width, + r->gl_display_buffer_height, + 0, + r->gl_display_buffer_format, + r->gl_display_buffer_type, + NULL); + } + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, r->gl_display_buffer, 0); + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + glDrawBuffers(1, DrawBuffers); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glBindVertexArray(r->disp_rndr.vao); + glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); + glUseProgram(r->disp_rndr.prog); + glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0); + glUniform2f(r->disp_rndr.display_size_loc, width, height); + glUniform1f(r->disp_rndr.line_offset_loc, line_offset); + render_display_pvideo_overlay(d); + + glViewport(0, 0, width, height); + glColorMask(true, true, true, true); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDisable(GL_STENCIL_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, 0, 0); +} + +static void gl_fence(void) +{ + GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, + (GLuint64)(5000000000)); + assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED); + glDeleteSync(fence); +} + +void pgraph_gl_sync(NV2AState *d) +{ + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL) { + qemu_event_set(&d->pgraph.sync_complete); + return; + } + + /* FIXME: Sanity check surface dimensions */ + + /* Wait for queued commands to complete */ + pgraph_gl_upload_surface_data(d, surface, !tcg_enabled()); + gl_fence(); + assert(glGetError() == GL_NO_ERROR); + + /* Render framebuffer in display context */ + glo_set_current(g_nv2a_context_display); + render_display(d, surface); + gl_fence(); + assert(glGetError() == GL_NO_ERROR); + + /* Switch back to original context */ + glo_set_current(g_nv2a_context_render); + + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +int pgraph_gl_get_framebuffer_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_mutex_lock(&d->pfifo.lock); + // FIXME: Possible race condition with pgraph, consider lock + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + qemu_mutex_unlock(&d->pfifo.lock); + return 0; + } + + assert(surface->color); + assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); + assert(surface->fmt.gl_format == GL_RGBA + || surface->fmt.gl_format == GL_RGB + || surface->fmt.gl_format == GL_BGR + || surface->fmt.gl_format == GL_BGRA + ); + + surface->frame_time = pg->frame_time; + qemu_event_reset(&d->pgraph.sync_complete); + qatomic_set(&pg->sync_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.sync_complete); + + return r->gl_display_buffer; +} diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c new file mode 100644 index 0000000000..94e9beb50b --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -0,0 +1,528 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/fast-hash.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "debug.h" +#include "renderer.h" + +void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_DPRINTF("---------PRE CLEAR ------\n"); + pg->clearing = true; + + GLbitfield gl_mask = 0; + + bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); + bool write_zeta = + (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); + + if (write_zeta) { + GLint gl_clear_stencil; + GLfloat gl_clear_depth; + pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth, + &gl_clear_stencil); + + if (parameter & NV097_CLEAR_SURFACE_Z) { + gl_mask |= GL_DEPTH_BUFFER_BIT; + glDepthMask(GL_TRUE); + glClearDepth(gl_clear_depth); + } + if (parameter & NV097_CLEAR_SURFACE_STENCIL) { + gl_mask |= GL_STENCIL_BUFFER_BIT; + glStencilMask(0xff); + glClearStencil(gl_clear_stencil); + } + } + if (write_color) { + gl_mask |= GL_COLOR_BUFFER_BIT; + glColorMask((parameter & NV097_CLEAR_SURFACE_R) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_G) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_B) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_A) + ? GL_TRUE : GL_FALSE); + + GLfloat rgba[4]; + pgraph_get_clear_color(pg, rgba); + glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]); + } + + pgraph_gl_surface_update(d, true, write_color, write_zeta); + + /* FIXME: Needs confirmation */ + unsigned int xmin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN); + unsigned int xmax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX); + unsigned int ymin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN); + unsigned int ymax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX); + + NV2A_DPRINTF( + "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n", + parameter, xmin, ymin, xmax, ymax, + d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]); + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + ymin = pg->surface_binding_dim.height - (ymin + scissor_height); + + NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, + xmin + scissor_width - 1, ymin + scissor_height - 1); + + bool full_clear = !xmin && !ymin && + scissor_width >= pg->surface_binding_dim.width && + scissor_height >= pg->surface_binding_dim.height; + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + /* FIXME: Respect window clip?!?! */ + glEnable(GL_SCISSOR_TEST); + glScissor(xmin, ymin, scissor_width, scissor_height); + + /* Dither */ + /* FIXME: Maybe also disable it here? + GL implementation dependent */ + if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) { + glEnable(GL_DITHER); + } else { + glDisable(GL_DITHER); + } + + glClear(gl_mask); + + glDisable(GL_SCISSOR_TEST); + + pgraph_gl_set_surface_dirty(pg, write_color, write_zeta); + + if (r->color_binding) { + r->color_binding->cleared = full_clear && write_color; + } + if (r->zeta_binding) { + r->zeta_binding->cleared = full_clear && write_zeta; + } + + pg->clearing = false; +} + +void pgraph_gl_draw_begin(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + pgraph_gl_surface_update(d, true, true, depth_test || stencil_test); + + if (is_nop_draw) { + return; + } + + assert(r->color_binding || r->zeta_binding); + + pgraph_gl_bind_textures(d); + pgraph_gl_bind_shaders(pg); + + glColorMask(mask_red, mask_green, mask_blue, mask_alpha); + glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE)); + glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE)); + + if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) { + glEnable(GL_BLEND); + uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_SFACTOR); + uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_DFACTOR); + assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map)); + assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map)); + glBlendFunc(pgraph_blend_factor_gl_map[sfactor], + pgraph_blend_factor_gl_map[dfactor]); + + uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_EQN); + assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map)); + glBlendEquation(pgraph_blend_equation_gl_map[equation]); + + uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR); + float gl_blend_color[4]; + pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color); + glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2], + gl_blend_color[3]); + } else { + glDisable(GL_BLEND); + } + + /* Face culling */ + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) + & NV_PGRAPH_SETUPRASTER_CULLENABLE) { + uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_CULLCTRL); + assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map)); + glCullFace(pgraph_cull_face_gl_map[cull_face]); + glEnable(GL_CULL_FACE); + } else { + glDisable(GL_CULL_FACE); + } + + /* Clipping */ + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + + /* Front-face select */ + glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) + & NV_PGRAPH_SETUPRASTER_FRONTFACE + ? GL_CCW : GL_CW); + + /* Polygon offset */ + /* FIXME: GL implementation-specific, maybe do this in VS? */ + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { + glEnable(GL_POLYGON_OFFSET_FILL); + } else { + glDisable(GL_POLYGON_OFFSET_FILL); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { + glEnable(GL_POLYGON_OFFSET_LINE); + } else { + glDisable(GL_POLYGON_OFFSET_LINE); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { + glEnable(GL_POLYGON_OFFSET_POINT); + } else { + glDisable(GL_POLYGON_OFFSET_POINT); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + GLfloat zfactor = *(float*)&zfactor_u32; + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + GLfloat zbias = *(float*)&zbias_u32; + glPolygonOffset(zfactor, zbias); + } + + /* Depth testing */ + if (depth_test) { + glEnable(GL_DEPTH_TEST); + + uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ZFUNC); + assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map)); + glDepthFunc(pgraph_depth_func_gl_map[depth_func]); + } else { + glDisable(GL_DEPTH_TEST); + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { + glEnable(GL_DEPTH_CLAMP); + } else { + glDisable(GL_DEPTH_CLAMP); + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + } + + if (stencil_test) { + glEnable(GL_STENCIL_TEST); + + uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_FUNC); + uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_REF); + uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); + uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); + uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); + uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); + + assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map)); + assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + + glStencilFunc( + pgraph_stencil_func_gl_map[stencil_func], + stencil_ref, + func_mask); + + glStencilOp( + pgraph_stencil_op_gl_map[op_fail], + pgraph_stencil_op_gl_map[op_zfail], + pgraph_stencil_op_gl_map[op_zpass]); + + } else { + glDisable(GL_STENCIL_TEST); + } + + /* Dither */ + /* FIXME: GL implementation dependent */ + if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_DITHERENABLE) { + glEnable(GL_DITHER); + } else { + glDisable(GL_DITHER); + } + + glEnable(GL_PROGRAM_POINT_SIZE); + + bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE); + + /* Edge Antialiasing */ + if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { + glEnable(GL_LINE_SMOOTH); + } else { + glDisable(GL_LINE_SMOOTH); + } + if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { + glEnable(GL_POLYGON_SMOOTH); + } else { + glDisable(GL_POLYGON_SMOOTH); + } + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + glViewport(0, 0, vp_width, vp_height); + + /* Surface clip */ + /* FIXME: Consider moving to PSH w/ window clip */ + unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x, + ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y; + unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, + ymax = ymin + pg->surface_shape.clip_height - 1; + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + ymin = pg->surface_binding_dim.height - (ymin + scissor_height); + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + glEnable(GL_SCISSOR_TEST); + glScissor(xmin, ymin, scissor_width, scissor_height); + + /* Visibility testing */ + if (pg->zpass_pixel_count_enable) { + r->gl_zpass_pixel_count_query_count++; + r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc( + r->gl_zpass_pixel_count_queries, + sizeof(GLuint) * r->gl_zpass_pixel_count_query_count); + + GLuint gl_query; + glGenQueries(1, &gl_query); + r->gl_zpass_pixel_count_queries[ + r->gl_zpass_pixel_count_query_count - 1] = gl_query; + glBeginQuery(GL_SAMPLES_PASSED, gl_query); + } +} + +void pgraph_gl_draw_end(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + if (is_nop_draw) { + // FIXME: Check PGRAPH register 0x880. + // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit + // check that will raise an exception in the case that a draw should + // modify the color and/or zeta buffer but the target(s) are masked + // off. This check only seems to trigger during the fragment + // processing, it is legal to attempt a draw that is entirely + // clipped regardless of 0x880. See xemu#635 for context. + return; + } + + pgraph_gl_flush_draw(d); + + /* End of visibility testing */ + if (pg->zpass_pixel_count_enable) { + nv2a_profile_inc_counter(NV2A_PROF_QUERY); + glEndQuery(GL_SAMPLES_PASSED); + } + + pg->draw_time++; + if (r->color_binding && pgraph_color_write_enabled(pg)) { + r->color_binding->draw_time = pg->draw_time; + } + if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) { + r->zeta_binding->draw_time = pg->draw_time; + } + + pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test); + NV2A_GL_DGROUP_END(); +} + +void pgraph_gl_flush_draw(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (!(r->color_binding || r->zeta_binding)) { + return; + } + assert(r->shader_binding); + + if (pg->draw_arrays_length) { + NV2A_GL_DPRINTF(false, "Draw Arrays"); + nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); + assert(pg->inline_elements_length == 0); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start, + pg->draw_arrays_max_count - 1, + false, 0, + pg->draw_arrays_max_count - 1); + glMultiDrawArrays(r->shader_binding->gl_primitive_mode, + pg->draw_arrays_start, + pg->draw_arrays_count, + pg->draw_arrays_length); + } else if (pg->inline_elements_length) { + NV2A_GL_DPRINTF(false, "Inline Elements"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + uint32_t min_element = (uint32_t)-1; + uint32_t max_element = 0; + for (int i=0; i < pg->inline_elements_length; i++) { + max_element = MAX(pg->inline_elements[i], max_element); + min_element = MIN(pg->inline_elements[i], min_element); + } + + pgraph_gl_bind_vertex_attributes( + d, min_element, max_element, false, 0, + pg->inline_elements[pg->inline_elements_length - 1]); + + VertexKey k; + memset(&k, 0, sizeof(VertexKey)); + k.count = pg->inline_elements_length; + k.gl_type = GL_UNSIGNED_INT; + k.gl_normalize = GL_FALSE; + k.stride = sizeof(uint32_t); + uint64_t h = fast_hash((uint8_t*)pg->inline_elements, + pg->inline_elements_length * 4); + + LruNode *node = lru_lookup(&r->element_cache, h, &k); + VertexLruNode *found = container_of(node, VertexLruNode, node); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer); + if (!found->initialized) { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + pg->inline_elements_length * 4, + pg->inline_elements, GL_STATIC_DRAW); + found->initialized = true; + } else { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY); + } + glDrawElements(r->shader_binding->gl_primitive_mode, + pg->inline_elements_length, GL_UNSIGNED_INT, + (void *)0); + } else if (pg->inline_buffer_length) { + NV2A_GL_DPRINTF(false, "Inline Buffer"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); + assert(pg->inline_array_length == 0); + + if (pg->compressed_attrs) { + pg->compressed_attrs = 0; + pgraph_gl_bind_shaders(pg); + } + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->inline_buffer_populated) { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]); + glBufferData(GL_ARRAY_BUFFER, + pg->inline_buffer_length * sizeof(float) * 4, + attr->inline_buffer, GL_STREAM_DRAW); + glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(i); + attr->inline_buffer_populated = false; + memcpy(attr->inline_value, + attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, + sizeof(attr->inline_value)); + } else { + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + } + } + + glDrawArrays(r->shader_binding->gl_primitive_mode, + 0, pg->inline_buffer_length); + } else if (pg->inline_array_length) { + NV2A_GL_DPRINTF(false, "Inline Array"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); + + unsigned int index_count = pgraph_gl_bind_inline_array(d); + glDrawArrays(r->shader_binding->gl_primitive_mode, + 0, index_count); + } else { + NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END"); + NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); + } +} diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build new file mode 100644 index 0000000000..ab25eacb7d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/meson.build @@ -0,0 +1,12 @@ +specific_ss.add([sdl, gloffscreen, files( + 'blit.c', + 'debug.c', + 'display.c', + 'draw.c', + 'renderer.c', + 'reports.c', + 'shaders.c', + 'surface.c', + 'texture.c', + 'vertex.c', + )]) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c new file mode 100644 index 0000000000..2114608683 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -0,0 +1,201 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "debug.h" +#include "renderer.h" + +GloContext *g_nv2a_context_render; +GloContext *g_nv2a_context_display; + +static void nv2a_gl_context_init(void) +{ + g_nv2a_context_render = glo_context_create(); + g_nv2a_context_display = glo_context_create(); +} + +static void pgraph_gl_init_thread(NV2AState *d) +{ + glo_set_current(g_nv2a_context_render); +} + +static void pgraph_gl_deinit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + glo_set_current(g_nv2a_context_render); + + pgraph_gl_deinit_surfaces(pg); + pgraph_gl_deinit_shader_cache(pg); + pgraph_gl_deinit_texture_cache(pg); + + glo_set_current(NULL); + glo_context_destroy(g_nv2a_context_render); + glo_context_destroy(g_nv2a_context_display); +} + +static void pgraph_gl_flip_stall(NV2AState *d) +{ + NV2A_GL_DFRAME_TERMINATOR(); + glFinish(); +} + +static void pgraph_gl_flush(NV2AState *d) +{ + pgraph_gl_surface_flush(d); + pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); + pgraph_gl_update_entire_memory_buffer(d); + /* FIXME: Flush more? */ + + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_gl_process_pending(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (qatomic_read(&r->downloads_pending) || + qatomic_read(&r->download_dirty_surfaces_pending) || + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) || + qatomic_read(&r->shader_cache_writeback_pending)) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&r->downloads_pending)) { + pgraph_gl_process_pending_downloads(d); + } + if (qatomic_read(&r->download_dirty_surfaces_pending)) { + pgraph_gl_download_dirty_surfaces(d); + } + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_gl_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_gl_flush(d); + } + if (qatomic_read(&r->shader_cache_writeback_pending)) { + pgraph_gl_shader_write_cache_reload_list(&d->pgraph); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_gl_pre_savevm_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qatomic_set(&r->download_dirty_surfaces_pending, true); + qemu_event_reset(&r->dirty_surfaces_download_complete); +} + +static void pgraph_gl_pre_savevm_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_event_wait(&r->dirty_surfaces_download_complete); +} + +static void pgraph_gl_pre_shutdown_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qatomic_set(&r->shader_cache_writeback_pending, true); + qemu_event_reset(&r->shader_cache_writeback_complete); +} + +static void pgraph_gl_pre_shutdown_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_event_wait(&r->shader_cache_writeback_complete); +} + +static void pgraph_gl_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState)); + + /* fire up opengl */ + glo_set_current(g_nv2a_context_render); + +#ifdef DEBUG_NV2A_GL + gl_debug_initialize(); +#endif + + /* DXT textures */ + assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); + /* Internal RGB565 texture format */ + assert(glo_check_extension("GL_ARB_ES2_compatibility")); + + pgraph_gl_init_surfaces(pg); + pgraph_gl_init_reports(d); + pgraph_gl_init_texture_cache(d); + pgraph_gl_init_vertex_cache(d); + pgraph_gl_init_shader_cache(pg); + + glo_set_current(g_nv2a_context_display); + pgraph_gl_init_display_renderer(d); + + glo_set_current(NULL); +} + +static PGRAPHRenderer pgraph_gl_renderer = { + .type = CONFIG_DISPLAY_RENDERER_OPENGL, + .name = "OpenGL", + .ops = { + .init = pgraph_gl_init, + .early_context_init = nv2a_gl_context_init, + .init_thread = pgraph_gl_init_thread, + .finalize = pgraph_gl_deinit, + .clear_report_value = pgraph_gl_clear_report_value, + .clear_surface = pgraph_gl_clear_surface, + .draw_begin = pgraph_gl_draw_begin, + .draw_end = pgraph_gl_draw_end, + .flip_stall = pgraph_gl_flip_stall, + .flush_draw = pgraph_gl_flush_draw, + .get_report = pgraph_gl_get_report, + .image_blit = pgraph_gl_image_blit, + .pre_savevm_trigger = pgraph_gl_pre_savevm_trigger, + .pre_savevm_wait = pgraph_gl_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_gl_pre_shutdown_wait, + .process_pending = pgraph_gl_process_pending, + .process_pending_reports = pgraph_gl_process_pending_reports, + .surface_update = pgraph_gl_surface_update, + .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor, + .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor, + .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_gl_renderer); +} diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h new file mode 100644 index 0000000000..fff4ac7d53 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -0,0 +1,283 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H +#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/lru.h" + +#include "hw/hw.h" + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/surface.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +#include "gloffscreen.h" +#include "constants.h" + +typedef struct SurfaceBinding { + QTAILQ_ENTRY(SurfaceBinding) entry; + MemAccessCallback *access_cb; + + hwaddr vram_addr; + + SurfaceShape shape; + uintptr_t dma_addr; + uintptr_t dma_len; + bool color; + bool swizzle; + + unsigned int width; + unsigned int height; + unsigned int pitch; + size_t size; + + bool cleared; + int frame_time; + int draw_time; + bool draw_dirty; + bool download_pending; + bool upload_pending; + + GLuint gl_buffer; + SurfaceFormatInfo fmt; +} SurfaceBinding; + +typedef struct TextureBinding { + unsigned int refcnt; + int draw_time; + uint64_t data_hash; + unsigned int scale; + unsigned int min_filter; + unsigned int mag_filter; + unsigned int addru; + unsigned int addrv; + unsigned int addrp; + uint32_t border_color; + bool border_color_set; + GLenum gl_target; + GLuint gl_texture; +} TextureBinding; + +typedef struct ShaderBinding { + GLuint gl_program; + GLenum gl_primitive_mode; + + GLint psh_constant_loc[9][2]; + GLint alpha_ref_loc; + + GLint bump_mat_loc[NV2A_MAX_TEXTURES]; + GLint bump_scale_loc[NV2A_MAX_TEXTURES]; + GLint bump_offset_loc[NV2A_MAX_TEXTURES]; + GLint tex_scale_loc[NV2A_MAX_TEXTURES]; + + GLint surface_size_loc; + GLint clip_range_loc; + + GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + + GLint inv_viewport_loc; + GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; + GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; + GLint ltc1_loc[NV2A_LTC1_COUNT]; + + GLint fog_color_loc; + GLint fog_param_loc; + GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; + GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS]; + GLint light_local_position_loc[NV2A_MAX_LIGHTS]; + GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS]; + + GLint clip_region_loc[8]; + + GLint material_alpha_loc; +} ShaderBinding; + +typedef struct ShaderLruNode { + LruNode node; + bool cached; + void *program; + size_t program_size; + GLenum program_format; + ShaderState state; + ShaderBinding *binding; + QemuThread *save_thread; +} ShaderLruNode; + +typedef struct VertexKey { + size_t count; + size_t stride; + hwaddr addr; + + GLboolean gl_normalize; + GLuint gl_type; +} VertexKey; + +typedef struct VertexLruNode { + LruNode node; + VertexKey key; + bool initialized; + + GLuint gl_buffer; +} VertexLruNode; + +typedef struct TextureKey { + TextureShape state; + hwaddr texture_vram_offset; + hwaddr texture_length; + hwaddr palette_vram_offset; + hwaddr palette_length; +} TextureKey; + +typedef struct TextureLruNode { + LruNode node; + TextureKey key; + TextureBinding *binding; + bool possibly_dirty; +} TextureLruNode; + +typedef struct QueryReport { + QSIMPLEQ_ENTRY(QueryReport) entry; + bool clear; + uint32_t parameter; + unsigned int query_count; + GLuint *queries; +} QueryReport; + +typedef struct PGRAPHGLState { + GLuint gl_framebuffer; + GLuint gl_display_buffer; + GLint gl_display_buffer_internal_format; + GLsizei gl_display_buffer_width; + GLsizei gl_display_buffer_height; + GLenum gl_display_buffer_format; + GLenum gl_display_buffer_type; + + Lru element_cache; + VertexLruNode *element_cache_entries; + GLuint gl_inline_array_buffer; + GLuint gl_memory_buffer; + GLuint gl_vertex_array; + GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES]; + + QTAILQ_HEAD(, SurfaceBinding) surfaces; + SurfaceBinding *color_binding, *zeta_binding; + bool downloads_pending; + QemuEvent downloads_complete; + bool download_dirty_surfaces_pending; + QemuEvent dirty_surfaces_download_complete; // common + + TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; + Lru texture_cache; + TextureLruNode *texture_cache_entries; + + Lru shader_cache; + ShaderLruNode *shader_cache_entries; + ShaderBinding *shader_binding; + QemuMutex shader_cache_lock; + QemuThread shader_disk_thread; + + unsigned int zpass_pixel_count_result; + unsigned int gl_zpass_pixel_count_query_count; + GLuint *gl_zpass_pixel_count_queries; + QSIMPLEQ_HEAD(, QueryReport) report_queue; + + bool shader_cache_writeback_pending; + QemuEvent shader_cache_writeback_complete; + + struct s2t_rndr { + GLuint fbo, vao, vbo, prog; + GLuint tex_loc, surface_size_loc; + } s2t_rndr; + + struct disp_rndr { + GLuint fbo, vao, vbo, prog; + GLuint display_size_loc; + GLuint line_offset_loc; + GLuint tex_loc; + GLuint pvideo_tex; + GLint pvideo_enable_loc; + GLint pvideo_tex_loc; + GLint pvideo_in_pos_loc; + GLint pvideo_pos_loc; + GLint pvideo_scale_loc; + GLint pvideo_color_key_enable_loc; + GLint pvideo_color_key_loc; + GLint palette_loc[256]; + } disp_rndr; +} PGRAPHGLState; + +extern GloContext *g_nv2a_context_render; +extern GloContext *g_nv2a_context_display; + +unsigned int pgraph_gl_bind_inline_array(NV2AState *d); +void pgraph_gl_bind_shaders(PGRAPHState *pg); +void pgraph_gl_bind_textures(NV2AState *d); +void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); +bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape); +GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src); +void pgraph_gl_deinit_shader_cache(PGRAPHState *pg); +void pgraph_gl_deinit_surfaces(PGRAPHState *pg); +void pgraph_gl_deinit_texture_cache(PGRAPHState *pg); +void pgraph_gl_download_dirty_surfaces(NV2AState *d); +void pgraph_gl_clear_report_value(NV2AState *d); +void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter); +void pgraph_gl_draw_begin(NV2AState *d); +void pgraph_gl_draw_end(NV2AState *d); +void pgraph_gl_flush_draw(NV2AState *d); +void pgraph_gl_get_report(NV2AState *d, uint32_t parameter); +void pgraph_gl_image_blit(NV2AState *d); +void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); +void pgraph_gl_process_pending_reports(NV2AState *d); +void pgraph_gl_surface_flush(NV2AState *d); +void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write); +void pgraph_gl_sync(NV2AState *d); +void pgraph_gl_update_entire_memory_buffer(NV2AState *d); +void pgraph_gl_init_display_renderer(NV2AState *d); +void pgraph_gl_init_reports(NV2AState *d); +void pgraph_gl_init_shader_cache(PGRAPHState *pg); +void pgraph_gl_init_surfaces(PGRAPHState *pg); +void pgraph_gl_init_texture_cache(NV2AState *d); +void pgraph_gl_init_vertex_cache(NV2AState *d); +void pgraph_gl_process_pending_downloads(NV2AState *d); +void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg); +void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); +void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); +void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); +SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr); +SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr); +void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e); +void pgraph_gl_unbind_surface(NV2AState *d, bool color); +void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); +void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode); +bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode); +void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); +void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); +unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); +int pgraph_gl_get_framebuffer_surface(NV2AState *d); + +#endif diff --git a/hw/xbox/nv2a/pgraph/gl/reports.c b/hw/xbox/nv2a/pgraph/gl/reports.c new file mode 100644 index 0000000000..0673c37e0c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/reports.c @@ -0,0 +1,111 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include "renderer.h" + +static void process_pending_report(NV2AState *d, QueryReport *report) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (report->clear) { + r->zpass_pixel_count_result = 0; + return; + } + + uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + /* FIXME: Multisampling affects this (both: OGL and Xbox GPU), + * not sure if CLEARs also count + */ + /* FIXME: What about clipping regions etc? */ + for (int i = 0; i < report->query_count; i++) { + GLuint gl_query_result = 0; + glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result); + gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor; + r->zpass_pixel_count_result += gl_query_result; + } + + if (report->query_count) { + glDeleteQueries(report->query_count, report->queries); + g_free(report->queries); + } + + pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result); +} + +void pgraph_gl_process_pending_reports(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + QueryReport *report, *next; + + QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) { + process_pending_report(d, report); + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(report); + } +} + +void pgraph_gl_clear_report_value(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + /* FIXME: Does this have a value in parameter? Also does this (also?) modify + * the report memory block? + */ + if (r->gl_zpass_pixel_count_query_count) { + glDeleteQueries(r->gl_zpass_pixel_count_query_count, + r->gl_zpass_pixel_count_queries); + r->gl_zpass_pixel_count_query_count = 0; + } + + QueryReport *report = g_malloc(sizeof(QueryReport)); + report->clear = true; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); +} + +void pgraph_gl_init_reports(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + QSIMPLEQ_INIT(&r->report_queue); +} + +void pgraph_gl_get_report(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + QueryReport *report = g_malloc(sizeof(QueryReport)); + report->clear = false; + report->parameter = parameter; + report->query_count = r->gl_zpass_pixel_count_query_count; + report->queries = r->gl_zpass_pixel_count_queries; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); + + r->gl_zpass_pixel_count_query_count = 0; + r->gl_zpass_pixel_count_queries = NULL; +} diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c new file mode 100644 index 0000000000..0bb4eaa598 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -0,0 +1,1102 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/fast-hash.h" +#include "qemu/mstring.h" +#include + +#include "xemu-version.h" +#include "ui/xemu-settings.h" +#include "hw/xbox/nv2a/pgraph/glsl/geom.h" +#include "hw/xbox/nv2a/pgraph/glsl/vsh.h" +#include "hw/xbox/nv2a/pgraph/glsl/psh.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "debug.h" +#include "renderer.h" + +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); + +static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) +{ + if (polygon_mode == POLY_MODE_POINT) { + return GL_POINTS; + } + + switch (primitive_mode) { + case PRIM_TYPE_POINTS: return GL_POINTS; + case PRIM_TYPE_LINES: return GL_LINES; + case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; + case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; + case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; + case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; + case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; + case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; + case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return GL_LINE_LOOP; + } else if (polygon_mode == POLY_MODE_FILL) { + return GL_TRIANGLE_FAN; + } + + assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); + return 0; + default: + assert(!"Invalid primitive_mode"); + return 0; + } +} + +static GLuint create_gl_shader(GLenum gl_shader_type, + const char *code, + const char *name) +{ + GLint compiled = 0; + + NV2A_GL_DGROUP_BEGIN("Creating new %s", name); + + NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code); + + GLuint shader = glCreateShader(gl_shader_type); + glShaderSource(shader, 1, &code, 0); + glCompileShader(shader); + + /* Check it compiled */ + compiled = 0; + glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + GLchar* log; + GLint log_length; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + log = g_malloc(log_length * sizeof(GLchar)); + glGetShaderInfoLog(shader, log_length, NULL, log); + fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log); + g_free(log); + + NV2A_GL_DGROUP_END(); + abort(); + } + + NV2A_GL_DGROUP_END(); + + return shader; +} + +static void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) +{ + int i, j; + char tmp[64]; + + /* set texture samplers */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + char samplerName[16]; + snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); + GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); + if (texSampLoc >= 0) { + glUniform1i(texSampLoc, i); + } + } + + /* validate the program */ + glValidateProgram(binding->gl_program); + GLint valid = 0; + glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); + fprintf(stderr, "nv2a: shader validation failed: %s\n", log); + abort(); + } + + /* lookup fragment shader uniforms */ + for (i = 0; i < 9; i++) { + for (j = 0; j < 2; j++) { + snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); + binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); + } + } + binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); + for (i = 1; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "bumpMat%d", i); + binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpScale%d", i); + binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); + binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "texScale%d", i); + binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + /* lookup vertex shader uniforms */ + for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { + snprintf(tmp, sizeof(tmp), "c[%d]", i); + binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); + binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); + binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); + binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); + + binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); + for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); + binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); + binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTC1_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); + binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); + binding->light_infinite_half_vector_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); + binding->light_infinite_direction_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + + snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); + binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); + binding->light_local_attenuation_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < 8; i++) { + snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); + binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + if (state->fixed_function) { + binding->material_alpha_loc = + glGetUniformLocation(binding->gl_program, "material_alpha"); + } else { + binding->material_alpha_loc = -1; + } +} + +static ShaderBinding *generate_shaders(const ShaderState *state) +{ + char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); + if (previous_numeric_locale) { + previous_numeric_locale = g_strdup(previous_numeric_locale); + } + + /* Ensure numeric values are printed with '.' radix, no grouping */ + setlocale(LC_NUMERIC, "C"); + GLuint program = glCreateProgram(); + + /* Create an optional geometry shader and find primitive type */ + GLenum gl_primitive_mode = + get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode); + MString* geometry_shader_code = + pgraph_gen_geom_glsl(state->polygon_front_mode, + state->polygon_back_mode, + state->primitive_mode, + state->smooth_shading, + false); + if (geometry_shader_code) { + const char* geometry_shader_code_str = + mstring_get_str(geometry_shader_code); + GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER, + geometry_shader_code_str, + "geometry shader"); + glAttachShader(program, geometry_shader); + mstring_unref(geometry_shader_code); + } + + /* create the vertex shader */ + MString *vertex_shader_code = + pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL); + GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER, + mstring_get_str(vertex_shader_code), + "vertex shader"); + glAttachShader(program, vertex_shader); + mstring_unref(vertex_shader_code); + + /* generate a fragment shader from register combiners */ + MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh); + const char *fragment_shader_code_str = + mstring_get_str(fragment_shader_code); + GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER, + fragment_shader_code_str, + "fragment shader"); + glAttachShader(program, fragment_shader); + mstring_unref(fragment_shader_code); + + /* link the program */ + glLinkProgram(program); + GLint linked = 0; + glGetProgramiv(program, GL_LINK_STATUS, &linked); + if(!linked) { + GLchar log[2048]; + glGetProgramInfoLog(program, 2048, NULL, log); + fprintf(stderr, "nv2a: shader linking failed: %s\n", log); + abort(); + } + + glUseProgram(program); + + ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); + ret->gl_program = program; + ret->gl_primitive_mode = gl_primitive_mode; + + update_shader_constant_locations(ret, state); + + if (previous_numeric_locale) { + setlocale(LC_NUMERIC, previous_numeric_locale); + g_free(previous_numeric_locale); + } + + return ret; +} + +static const char *shader_gl_vendor = NULL; + +static void shader_create_cache_folder(void) +{ + char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); + qemu_mkdir(shader_path); + g_free(shader_path); +} + +static char *shader_get_lru_cache_path(void) +{ + return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); +} + +static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) +{ + FILE *lru_list_file = (FILE*) opaque; + size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); + if (written != 1) { + fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", + (unsigned long long) node->hash); + } +} + +void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + if (!g_config.perf.cache_shaders) { + qatomic_set(&r->shader_cache_writeback_pending, false); + qemu_event_set(&r->shader_cache_writeback_complete); + return; + } + + char *shader_lru_path = shader_get_lru_cache_path(); + qemu_thread_join(&r->shader_disk_thread); + + FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); + g_free(shader_lru_path); + if (!lru_list) { + fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); + return; + } + + lru_visit_active(&r->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); + fclose(lru_list); + + lru_flush(&r->shader_cache); + + qatomic_set(&r->shader_cache_writeback_pending, false); + qemu_event_set(&r->shader_cache_writeback_complete); +} + +bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode) +{ + assert(glGetError() == GL_NO_ERROR); + + if (!snode->program) { + return false; + } + + GLuint gl_program = glCreateProgram(); + glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); + GLint gl_error = glGetError(); + if (gl_error != GL_NO_ERROR) { + NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); + glDeleteProgram(gl_program); + return false; + } + + glValidateProgram(gl_program); + GLint valid = 0; + glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(gl_program, 1024, NULL, log); + NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); + glDeleteProgram(gl_program); + return false; + } + + glUseProgram(gl_program); + + ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); + binding->gl_program = gl_program; + binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, + snode->state.primitive_mode); + snode->binding = binding; + + g_free(snode->program); + snode->program = NULL; + + update_shader_constant_locations(binding, &snode->state); + + return true; +} + +static char *shader_get_bin_directory(uint64_t hash) +{ + const char *cfg_dir = xemu_settings_get_base_path(); + uint64_t bin_mask = 0xffffUL << 48; + char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", + cfg_dir, (hash & bin_mask) >> 48); + return shader_bin_dir; +} + +static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) +{ + uint64_t bin_mask = 0xffffUL << 48; + return g_strdup_printf("%s/%012lx", shader_bin_dir, + hash & (~bin_mask)); +} + +static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + char *shader_bin_dir = shader_get_bin_directory(hash); + char *shader_path = shader_get_binary_path(shader_bin_dir, hash); + char *cached_xemu_version = NULL; + char *cached_gl_vendor = NULL; + void *program_buffer = NULL; + + uint64_t cached_xemu_version_len; + uint64_t gl_vendor_len; + GLenum program_binary_format; + ShaderState state; + size_t shader_size; + + g_free(shader_bin_dir); + + qemu_mutex_lock(&r->shader_cache_lock); + if (lru_contains_hash(&r->shader_cache, hash)) { + qemu_mutex_unlock(&r->shader_cache_lock); + return; + } + qemu_mutex_unlock(&r->shader_cache_lock); + + FILE *shader_file = qemu_fopen(shader_path, "rb"); + if (!shader_file) { + goto error; + } + + size_t nread; + #define READ_OR_ERR(data, data_len) \ + do { \ + nread = fread(data, data_len, 1, shader_file); \ + if (nread != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); + + cached_xemu_version = g_malloc(cached_xemu_version_len +1); + READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); + if (strcmp(cached_xemu_version, xemu_version) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + + cached_gl_vendor = g_malloc(gl_vendor_len); + READ_OR_ERR(cached_gl_vendor, gl_vendor_len); + if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); + READ_OR_ERR(&state, sizeof(state)); + READ_OR_ERR(&shader_size, sizeof(shader_size)); + + program_buffer = g_malloc(shader_size); + READ_OR_ERR(program_buffer, shader_size); + + #undef READ_OR_ERR + + fclose(shader_file); + g_free(shader_path); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); + + qemu_mutex_lock(&r->shader_cache_lock); + LruNode *node = lru_lookup(&r->shader_cache, hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + /* If we happened to regenerate this shader already, then we may as well use the new one */ + if (snode->binding) { + qemu_mutex_unlock(&r->shader_cache_lock); + return; + } + + snode->program_format = program_binary_format; + snode->program_size = shader_size; + snode->program = program_buffer; + snode->cached = true; + qemu_mutex_unlock(&r->shader_cache_lock); + return; + +error: + /* Delete the shader so it won't be loaded again */ + qemu_unlink(shader_path); + g_free(shader_path); + g_free(program_buffer); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); +} + +static void *shader_reload_lru_from_disk(void *arg) +{ + if (!g_config.perf.cache_shaders) { + return NULL; + } + + PGRAPHState *pg = (PGRAPHState*) arg; + char *shader_lru_path = shader_get_lru_cache_path(); + + FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); + g_free(shader_lru_path); + if (!lru_shaders_list) { + return NULL; + } + + uint64_t hash; + while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { + shader_load_from_disk(pg, hash); + } + + return NULL; +} + +static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + memcpy(&snode->state, state, sizeof(ShaderState)); + snode->cached = false; + snode->binding = NULL; + snode->program = NULL; + snode->save_thread = NULL; +} + +static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + if (snode->save_thread) { + qemu_thread_join(snode->save_thread); + g_free(snode->save_thread); + } + + if (snode->binding) { + glDeleteProgram(snode->binding->gl_program); + g_free(snode->binding); + } + + if (snode->program) { + g_free(snode->program); + } + + snode->cached = false; + snode->save_thread = NULL; + snode->binding = NULL; + snode->program = NULL; + memset(&snode->state, 0, sizeof(ShaderState)); +} + +static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + return memcmp(&snode->state, key, sizeof(ShaderState)); +} + +void pgraph_gl_init_shader_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_mutex_init(&r->shader_cache_lock); + qemu_event_init(&r->shader_cache_writeback_complete, false); + + if (!shader_gl_vendor) { + shader_gl_vendor = (const char *) glGetString(GL_VENDOR); + } + + shader_create_cache_folder(); + + /* FIXME: Make this configurable */ + const size_t shader_cache_size = 50*1024; + lru_init(&r->shader_cache); + r->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); + assert(r->shader_cache_entries != NULL); + for (int i = 0; i < shader_cache_size; i++) { + lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node); + } + + r->shader_cache.init_node = shader_cache_entry_init; + r->shader_cache.compare_nodes = shader_cache_entry_compare; + r->shader_cache.post_node_evict = shader_cache_entry_post_evict; + + qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache", + shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); +} + +void pgraph_gl_deinit_shader_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + // Clear out shader cache + pgraph_gl_shader_write_cache_reload_list(pg); + free(r->shader_cache_entries); + qemu_mutex_destroy(&r->shader_cache_lock); +} + +static void *shader_write_to_disk(void *arg) +{ + ShaderLruNode *snode = (ShaderLruNode*) arg; + + char *shader_bin = shader_get_bin_directory(snode->node.hash); + char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); + + static uint64_t gl_vendor_len; + if (gl_vendor_len == 0) { + gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); + } + + static uint64_t xemu_version_len = 0; + if (xemu_version_len == 0) { + xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); + } + + qemu_mkdir(shader_bin); + g_free(shader_bin); + + FILE *shader_file = qemu_fopen(shader_path, "wb"); + if (!shader_file) { + goto error; + } + + size_t written; + #define WRITE_OR_ERR(data, data_size) \ + do { \ + written = fwrite(data, data_size, 1, shader_file); \ + if (written != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); + WRITE_OR_ERR(xemu_version, xemu_version_len); + + WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); + + WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); + WRITE_OR_ERR(&snode->state, sizeof(snode->state)); + + WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); + WRITE_OR_ERR(snode->program, snode->program_size); + + #undef WRITE_OR_ERR + + fclose(shader_file); + + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + + return NULL; + +error: + fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); + qemu_unlink(shader_path); + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + return NULL; +} + +void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode) +{ + if (!snode->binding || snode->cached) { + return; + } + + GLint program_size; + glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); + + if (snode->program) { + g_free(snode->program); + snode->program = NULL; + } + + /* program_size might be zero on some systems, if no binary formats are supported */ + if (program_size == 0) { + return; + } + + snode->program = g_malloc(program_size); + GLsizei program_size_copied; + glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, + &snode->program_format, snode->program); + assert(glGetError() == GL_NO_ERROR); + + snode->program_size = program_size_copied; + snode->cached = true; + + char name[24]; + snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); + snode->save_thread = g_malloc0(sizeof(QemuThread)); + qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); +} + +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, + bool binding_changed, + + // FIXME: Remove these... We already know it from binding.state + bool vertex_program, + bool fixed_function) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + int i, j; + + /* update combiner constants */ + for (i = 0; i < 9; i++) { + uint32_t constant[2]; + if (i == 8) { + /* final combiner */ + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1); + } else { + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (j = 0; j < 2; j++) { + GLint loc = binding->psh_constant_loc[i][j]; + if (loc != -1) { + float value[4]; + pgraph_argb_pack32_to_rgba_float(constant[j], value); + glUniform4fv(loc, 1, value); + } + } + } + if (binding->alpha_ref_loc != -1) { + float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; + glUniform1f(binding->alpha_ref_loc, alpha_ref); + } + + + /* For each texture stage */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + GLint loc; + + /* Bump luminance only during stages 1 - 3 */ + if (i > 0) { + loc = binding->bump_mat_loc[i]; + if (loc != -1) { + uint32_t m_u32[4]; + m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)); + m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)); + m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)); + m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)); + float m[4]; + m[0] = *(float*)&m_u32[0]; + m[1] = *(float*)&m_u32[1]; + m[2] = *(float*)&m_u32[2]; + m[3] = *(float*)&m_u32[3]; + glUniformMatrix2fv(loc, 1, GL_FALSE, m); + } + loc = binding->bump_scale_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4); + glUniform1f(loc, *(float*)&v); + } + loc = binding->bump_offset_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4); + glUniform1f(loc, *(float*)&v); + } + } + + loc = r->shader_binding->tex_scale_loc[i]; + if (loc != -1) { + assert(r->texture_binding[i] != NULL); + glUniform1f(loc, (float)r->texture_binding[i]->scale); + } + } + + if (binding->fog_color_loc != -1) { + uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR); + glUniform4f(binding->fog_color_loc, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); + } + if (binding->fog_param_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1); + glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]); + } + + float zmax; + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; + break; + default: + assert(0); + } + + if (fixed_function) { + /* update lighting constants */ + struct { + uint32_t* v; + bool* dirty; + GLint* locs; + size_t len; + } lighting_arrays[] = { + {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, + {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, + {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, + }; + + for (i=0; ilight_infinite_half_vector_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); + } + loc = binding->light_infinite_direction_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_direction[i]); + } + + loc = binding->light_local_position_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_position[i]); + } + loc = binding->light_local_attenuation_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_attenuation[i]); + } + } + + /* estimate the viewport by assuming it matches the surface ... */ + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + + float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); + float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); + float m33 = zmax; + float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; + float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; + + float invViewport[16] = { + 1.0/m11, 0, 0, 0, + 0, 1.0/m22, 0, 0, + 0, 0, 1.0/m33, 0, + -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 + }; + + if (binding->inv_viewport_loc != -1) { + glUniformMatrix4fv(binding->inv_viewport_loc, + 1, GL_FALSE, &invViewport[0]); + } + } + + /* update vertex program constants */ + for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; + + GLint loc = binding->vsh_constant_loc[i]; + if ((loc != -1) && + memcmp(binding->vsh_constants[i], pg->vsh_constants[i], + sizeof(pg->vsh_constants[1]))) { + glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]); + memcpy(binding->vsh_constants[i], pg->vsh_constants[i], + sizeof(pg->vsh_constants[i])); + } + + pg->vsh_constants_dirty[i] = false; + } + + if (binding->surface_size_loc != -1) { + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + glUniform2f(binding->surface_size_loc, + pg->surface_binding_dim.width / aa_width, + pg->surface_binding_dim.height / aa_height); + } + + if (binding->clip_range_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); + float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0; + float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0; + glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); + } + + /* Clipping regions */ + unsigned int max_gl_width = pg->surface_binding_dim.width; + unsigned int max_gl_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); + + for (i = 0; i < 8; i++) { + uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4); + unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; + uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4); + unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; + pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); + pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); + + pgraph_apply_scaling_factor(pg, &x_min, &y_min); + pgraph_apply_scaling_factor(pg, &x_max, &y_max); + + /* Translate for the GL viewport origin */ + int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); + int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); + + glUniform4i(r->shader_binding->clip_region_loc[i], + x_min, y_min_xlat, x_max, y_max_xlat); + } + + if (binding->material_alpha_loc != -1) { + glUniform1f(binding->material_alpha_loc, pg->material_alpha); + } +} + +static bool test_shaders_dirty(PGRAPHState *pg) +{ + #define CR_1(reg) CR_x(reg, 1) + #define CR_4(reg) CR_x(reg, 4) + #define CR_8(reg) CR_x(reg, 8) + #define CF(src, name) CF_x(typeof(src), (&src), name, 1) + #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src)) + #define CNAME(name) reg_check__ ## name + #define CX_x__define(type, name, x) static type CNAME(name)[x]; + #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x) + #define CF_x__define(type, src, name, x) CX_x__define(type, name, x) + #define CR_x__check(reg, x) \ + for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; } + #define CF_x__check(type, src, name, x) \ + for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; } + #define CR_x__update(reg, x) \ + for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); } + #define CF_x__update(type, src, name, x) \ + for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; } + + #define DIRTY_REGS \ + CR_1(NV_PGRAPH_COMBINECTL) \ + CR_1(NV_PGRAPH_SHADERCTL) \ + CR_1(NV_PGRAPH_SHADOWCTL) \ + CR_1(NV_PGRAPH_COMBINESPECFOG0) \ + CR_1(NV_PGRAPH_COMBINESPECFOG1) \ + CR_1(NV_PGRAPH_CONTROL_0) \ + CR_1(NV_PGRAPH_CONTROL_3) \ + CR_1(NV_PGRAPH_CSV0_C) \ + CR_1(NV_PGRAPH_CSV0_D) \ + CR_1(NV_PGRAPH_CSV1_A) \ + CR_1(NV_PGRAPH_CSV1_B) \ + CR_1(NV_PGRAPH_SETUPRASTER) \ + CR_1(NV_PGRAPH_SHADERPROG) \ + CR_8(NV_PGRAPH_COMBINECOLORI0) \ + CR_8(NV_PGRAPH_COMBINECOLORO0) \ + CR_8(NV_PGRAPH_COMBINEALPHAI0) \ + CR_8(NV_PGRAPH_COMBINEALPHAO0) \ + CR_8(NV_PGRAPH_COMBINEFACTOR0) \ + CR_8(NV_PGRAPH_COMBINEFACTOR1) \ + CR_1(NV_PGRAPH_SHADERCLIPMODE) \ + CR_4(NV_PGRAPH_TEXCTL0_0) \ + CR_4(NV_PGRAPH_TEXFMT0) \ + CR_4(NV_PGRAPH_TEXFILTER0) \ + CR_8(NV_PGRAPH_WINDOWCLIPX0) \ + CR_8(NV_PGRAPH_WINDOWCLIPY0) \ + CF(pg->primitive_mode, primitive_mode) \ + CF(pg->surface_scale_factor, surface_scale_factor) \ + CF(pg->compressed_attrs, compressed_attrs) \ + CFA(pg->texture_matrix_enable, texture_matrix_enable) + + #define CR_x(reg, x) CR_x__define(reg, x) + #define CF_x(type, src, name, x) CF_x__define(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + + #define CR_x(reg, x) CR_x__check(reg, x) + #define CF_x(type, src, name, x) CF_x__check(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + return false; + +dirty: + #define CR_x(reg, x) CR_x__update(reg, x) + #define CF_x(type, src, name, x) CF_x__update(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + return true; +} + +void pgraph_gl_bind_shaders(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, + vertex_program ? "yes" : "no", + fixed_function ? "yes" : "no"); + + bool binding_changed = false; + if (!test_shaders_dirty(pg) && !pg->program_data_dirty) { + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); + goto update_constants; + } + + pg->program_data_dirty = false; + + ShaderBinding* old_binding = r->shader_binding; + + ShaderState state = pgraph_get_shader_state(pg); + + uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); + qemu_mutex_lock(&r->shader_cache_lock); + LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + if (snode->binding || pgraph_gl_shader_load_from_memory(snode)) { + r->shader_binding = snode->binding; + } else { + r->shader_binding = generate_shaders(&state); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); + + /* cache it */ + snode->binding = r->shader_binding; + if (g_config.perf.cache_shaders) { + pgraph_gl_shader_cache_to_disk(snode); + } + } + + qemu_mutex_unlock(&r->shader_cache_lock); + + binding_changed = (r->shader_binding != old_binding); + if (binding_changed) { + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); + glUseProgram(r->shader_binding->gl_program); + } + +update_constants: + shader_update_constants(pg, r->shader_binding, binding_changed, + state.vertex_program, state.fixed_function); + + NV2A_GL_DGROUP_END(); +} + +GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src) +{ + GLint status; + char err_buf[512]; + + // Compile vertex shader + GLuint vs = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vs, 1, &vs_src, NULL); + glCompileShader(vs); + glGetShaderiv(vs, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf); + err_buf[sizeof(err_buf)-1] = '\0'; + fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf); + exit(1); + } + + // Compile fragment shader + GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(fs, 1, &fs_src, NULL); + glCompileShader(fs); + glGetShaderiv(fs, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf); + err_buf[sizeof(err_buf)-1] = '\0'; + fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf); + exit(1); + } + + // Link vertex and fragment shaders + GLuint prog = glCreateProgram(); + glAttachShader(prog, vs); + glAttachShader(prog, fs); + glLinkProgram(prog); + glUseProgram(prog); + + // Flag shaders for deletion (will still be retained for lifetime of prog) + glDeleteShader(vs); + glDeleteShader(fs); + + return prog; +} diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c new file mode 100644 index 0000000000..332ca7199e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -0,0 +1,1400 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "ui/xemu-settings.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "debug.h" +#include "renderer.h" + +static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force); +static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface, + bool swizzle, bool flip, bool downscale, + uint8_t *pixels); +static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height); + +void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; + + qemu_mutex_unlock_iothread(); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, true); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&r->dirty_surfaces_download_complete); + qatomic_set(&r->download_dirty_surfaces_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&r->dirty_surfaces_download_complete); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.flush_complete); + qatomic_set(&d->pgraph.flush_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.flush_complete); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, false); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock_iothread(); +} + +unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d) +{ + return d->pgraph.surface_scale_factor; +} + +void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg) +{ + int factor = g_config.display.quality.surface_scale; + pg->surface_scale_factor = factor < 1 ? 1 : factor; +} + +// FIXME: Move to common +static bool framebuffer_dirty(PGRAPHState *pg) +{ + bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, + sizeof(SurfaceShape)) != 0; + if (!shape_changed || (!pg->surface_shape.color_format + && !pg->surface_shape.zeta_format)) { + return false; + } + return true; +} + +void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", + color, zeta, + pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); + /* FIXME: Does this apply to CLEARs too? */ + color = color && pgraph_color_write_enabled(pg); + zeta = zeta && pgraph_zeta_write_enabled(pg); + pg->surface_color.draw_dirty |= color; + pg->surface_zeta.draw_dirty |= zeta; + + if (r->color_binding) { + r->color_binding->draw_dirty |= color; + r->color_binding->frame_time = pg->frame_time; + r->color_binding->cleared = false; + + } + + if (r->zeta_binding) { + r->zeta_binding->draw_dirty |= zeta; + r->zeta_binding->frame_time = pg->frame_time; + r->zeta_binding->cleared = false; + + } +} + +static void init_render_to_texture(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + const char *vs = + "#version 330\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + const char *fs = + "#version 330\n" + "uniform sampler2D tex;\n" + "uniform vec2 surface_size;\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord;\n" + " texCoord.x = gl_FragCoord.x;\n" + " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" + " + (textureSize(tex,0).y - surface_size.y);\n" + " texCoord /= textureSize(tex,0).xy;\n" + " out_Color.rgba = texture(tex, texCoord);\n" + "}\n"; + + r->s2t_rndr.prog = pgraph_gl_compile_shader(vs, fs); + r->s2t_rndr.tex_loc = glGetUniformLocation(r->s2t_rndr.prog, "tex"); + r->s2t_rndr.surface_size_loc = glGetUniformLocation(r->s2t_rndr.prog, + "surface_size"); + + glGenVertexArrays(1, &r->s2t_rndr.vao); + glBindVertexArray(r->s2t_rndr.vao); + glGenBuffers(1, &r->s2t_rndr.vbo); + glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo); + glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); + glGenFramebuffers(1, &r->s2t_rndr.fbo); +} + +static bool surface_to_texture_can_fastpath(SurfaceBinding *surface, + TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + // FIXME: Support zeta to color + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void render_surface_to(NV2AState *d, SurfaceBinding *surface, + int texture_unit, GLuint gl_target, + GLuint gl_texture, unsigned int width, + unsigned int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindFramebuffer(GL_FRAMEBUFFER, r->s2t_rndr.fbo); + + GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 }; + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, + gl_texture, 0); + glDrawBuffers(1, draw_buffers); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + assert(glGetError() == GL_NO_ERROR); + + float color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); + glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color); + + glBindVertexArray(r->s2t_rndr.vao); + glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo); + glUseProgram(r->s2t_rndr.prog); + glProgramUniform1i(r->s2t_rndr.prog, r->s2t_rndr.tex_loc, + texture_unit); + glProgramUniform2f(r->s2t_rndr.prog, + r->s2t_rndr.surface_size_loc, width, height); + + glViewport(0, 0, width, height); + glColorMask(true, true, true, true); + glDisable(GL_DITHER); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDisable(GL_STENCIL_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glClearColor(0.0f, 0.0f, 1.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0, + 0); + glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer); + glBindVertexArray(r->gl_vertex_array); + glBindTexture(gl_target, gl_texture); + glUseProgram( + r->shader_binding ? r->shader_binding->gl_program : 0); +} + +static void render_surface_to_texture_slow(NV2AState *d, + SurfaceBinding *surface, + TextureBinding *texture, + TextureShape *texture_shape, + int texture_unit) +{ + PGRAPHState *pg = &d->pgraph; + + const ColorFormatInfo *f = &kelvin_color_format_gl_map[texture_shape->color_format]; + assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map)); + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK); + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindTexture(texture->gl_target, texture->gl_texture); + + unsigned int width = surface->width, + height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + size_t bufsize = width * height * surface->fmt.bytes_per_pixel; + + uint8_t *buf = g_malloc(bufsize); + surface_download_to_buffer(d, surface, false, true, false, buf); + + width = texture_shape->width; + height = texture_shape->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, + f->gl_format, f->gl_type, buf); + g_free(buf); + glBindTexture(texture->gl_target, texture->gl_texture); +} + +/* Note: This function is intended to be called before PGRAPH configures GL + * state for rendering; it will configure GL state here but only restore a + * couple of items. + */ +void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, + TextureBinding *texture, + TextureShape *texture_shape, + int texture_unit) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const ColorFormatInfo *f = + &kelvin_color_format_gl_map[texture_shape->color_format]; + assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map)); + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + if (!surface_to_texture_can_fastpath(surface, texture_shape)) { + render_surface_to_texture_slow(d, surface, texture, + texture_shape, texture_unit); + return; + } + + unsigned int width = texture_shape->width, height = texture_shape->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindTexture(texture->gl_target, texture->gl_texture); + glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, + f->gl_format, f->gl_type, NULL); + glBindTexture(texture->gl_target, 0); + render_surface_to(d, surface, texture_unit, texture->gl_target, + texture->gl_texture, width, height); + glBindTexture(texture->gl_target, texture->gl_texture); + glUseProgram( + r->shader_binding ? r->shader_binding->gl_program : 0); +} + +bool pgraph_gl_check_surface_to_texture_compatibility( + const SurfaceBinding *surface, + const TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + if ((!surface->swizzle && surface->pitch != shape->pitch) || + surface->width != shape->width || + surface->height != shape->height) { + return false; + } + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + // FIXME: Support zeta to color + return false; + } + + if (shape->cubemap) { + // FIXME: Support rendering surface to cubemap face + return false; + } + + if (shape->levels > 1) { + // FIXME: Support rendering surface to mip levels + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: + break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void wait_for_surface_download(SurfaceBinding *e) +{ + NV2AState *d = g_nv2a; + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (qatomic_read(&e->draw_dirty)) { + qemu_mutex_lock(&d->pfifo.lock); + qemu_event_reset(&r->downloads_complete); + qatomic_set(&e->download_pending, true); + qatomic_set(&r->downloads_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&r->downloads_complete); + } +} + +static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, + hwaddr len, bool write) +{ + SurfaceBinding *e = opaque; + assert(addr >= e->vram_addr); + hwaddr offset = addr - e->vram_addr; + assert(offset < e->size); + + if (qatomic_read(&e->draw_dirty)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + wait_for_surface_download(e); + } + + if (write && !qatomic_read(&e->upload_pending)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + qatomic_set(&e->upload_pending, true); + } +} + +static SurfaceBinding *surface_put(NV2AState *d, hwaddr addr, + SurfaceBinding *surface_in) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + assert(pgraph_gl_surface_get(d, addr) == NULL); + + SurfaceBinding *surface, *next; + uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1; + QTAILQ_FOREACH_SAFE(surface, &r->surfaces, entry, next) { + uintptr_t s_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr > e_end + || surface_in->vram_addr > s_end); + if (overlapping) { + trace_nv2a_pgraph_surface_evict_overlapping( + surface->vram_addr, surface->width, surface->height, + surface->pitch); + pgraph_gl_surface_download_if_dirty(d, surface); + pgraph_gl_surface_invalidate(d, surface); + } + } + + SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding)); + assert(surface_out != NULL); + *surface_out = *surface_in; + + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_insert(qemu_get_cpu(0), + d->vram, surface_out->vram_addr, surface_out->size, + &surface_out->access_cb, &surface_access_callback, + surface_out); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } + + QTAILQ_INSERT_TAIL(&r->surfaces, surface_out, entry); + + return surface_out; +} + +SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (surface->vram_addr == addr) { + return surface; + } + } + + return NULL; +} + +SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (addr >= surface->vram_addr && + addr < (surface->vram_addr + surface->size)) { + return surface; + } + } + + return NULL; +} + +void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); + + if (surface == r->color_binding) { + assert(d->pgraph.surface_color.buffer_dirty); + pgraph_gl_unbind_surface(d, true); + } + if (surface == r->zeta_binding) { + assert(d->pgraph.surface_zeta.buffer_dirty); + pgraph_gl_unbind_surface(d, false); + } + + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } + + glDeleteTextures(1, &surface->gl_buffer); + + QTAILQ_REMOVE(&r->surfaces, surface, entry); + g_free(surface); +} + +static void surface_evict_old(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const int surface_age_limit = 5; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + int last_used = d->pgraph.frame_time - s->frame_time; + if (last_used >= surface_age_limit) { + trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); + pgraph_gl_surface_download_if_dirty(d, s); + pgraph_gl_surface_invalidate(d, s); + } + } +} + +static bool check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, + bool strict) +{ + bool format_compatible = + (s1->color == s2->color) && + (s1->fmt.gl_attachment == s2->fmt.gl_attachment) && + (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) && + (s1->pitch == s2->pitch) && + (s1->shape.clip_x <= s2->shape.clip_x) && + (s1->shape.clip_y <= s2->shape.clip_y); + if (!format_compatible) { + return false; + } + + if (!strict) { + return (s1->width >= s2->width) && (s1->height >= s2->height); + } else { + return (s1->width == s2->width) && (s1->height == s2->height); + } +} + +void pgraph_gl_surface_download_if_dirty(NV2AState *d, + SurfaceBinding *surface) +{ + if (surface->draw_dirty) { + surface_download(d, surface, true); + } +} + +static void bind_current_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (r->color_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, r->color_binding->fmt.gl_attachment, + GL_TEXTURE_2D, r->color_binding->gl_buffer, 0); + } + + if (r->zeta_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, r->zeta_binding->fmt.gl_attachment, + GL_TEXTURE_2D, r->zeta_binding->gl_buffer, 0); + } + + if (r->color_binding || r->zeta_binding) { + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == + GL_FRAMEBUFFER_COMPLETE); + } +} + +static void surface_copy_shrink_row(uint8_t *out, uint8_t *in, + unsigned int width, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + if (bytes_per_pixel == 4) { + for (unsigned int x = 0; x < width; x++) { + *(uint32_t *)out = *(uint32_t *)in; + out += 4; + in += 4 * factor; + } + } else if (bytes_per_pixel == 2) { + for (unsigned int x = 0; x < width; x++) { + *(uint16_t *)out = *(uint16_t *)in; + out += 2; + in += 2 * factor; + } + } else { + for (unsigned int x = 0; x < width; x++) { + memcpy(out, in, bytes_per_pixel); + out += bytes_per_pixel; + in += bytes_per_pixel * factor; + } + } +} + +static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface, + bool swizzle, bool flip, bool downscale, + uint8_t *pixels) +{ + PGRAPHState *pg = &d->pgraph; + + swizzle &= surface->swizzle; + downscale &= (pg->surface_scale_factor != 1); + + trace_nv2a_pgraph_surface_download( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + /* Bind destination surface to framebuffer */ + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, + GL_TEXTURE_2D, surface->gl_buffer, 0); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + /* Read surface into memory */ + uint8_t *gl_read_buf = pixels; + + uint8_t *swizzle_buf = pixels; + if (swizzle) { + /* FIXME: Allocate big buffer up front and re-alloc if necessary. + * FIXME: Consider swizzle in shader + */ + assert(pg->surface_scale_factor == 1 || downscale); + swizzle_buf = (uint8_t *)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + } + + if (downscale) { + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor * + surface->size); + gl_read_buf = pg->scale_buf; + } + + glo_readpixels( + surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel, + pg->surface_scale_factor * surface->pitch, + pg->surface_scale_factor * surface->width, + pg->surface_scale_factor * surface->height, flip, gl_read_buf); + + /* FIXME: Replace this with a hw accelerated version */ + if (downscale) { + assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel)); + uint8_t *out = swizzle_buf, *in = pg->scale_buf; + for (unsigned int y = 0; y < surface->height; y++) { + surface_copy_shrink_row(out, in, surface->width, + surface->fmt.bytes_per_pixel, + pg->surface_scale_factor); + in += surface->pitch * pg->surface_scale_factor * + pg->surface_scale_factor; + out += surface->pitch; + } + } + + if (swizzle) { + swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, + surface->pitch, surface->fmt.bytes_per_pixel); + g_free(swizzle_buf); + } + + /* Re-bind original framebuffer target */ + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, + GL_TEXTURE_2D, 0, 0); + bind_current_surface(d); +} + +static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force) +{ + if (!(surface->download_pending || force)) { + return; + } + + /* FIXME: Respect write enable at last TOU? */ + + nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + + surface_download_to_buffer(d, surface, true, true, true, + d->vram_ptr + surface->vram_addr); + + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_NV2A_TEX); + + surface->download_pending = false; + surface->draw_dirty = false; +} + +void pgraph_gl_process_pending_downloads(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + surface_download(d, surface, false); + } + + qatomic_set(&r->downloads_pending, false); + qemu_event_set(&r->downloads_complete); +} + +void pgraph_gl_download_dirty_surfaces(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + pgraph_gl_surface_download_if_dirty(d, surface); + } + + qatomic_set(&r->download_dirty_surfaces_pending, false); + qemu_event_set(&r->dirty_surfaces_download_complete); +} + +static void surface_copy_expand_row(uint8_t *out, uint8_t *in, + unsigned int width, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + if (bytes_per_pixel == 4) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint32_t *)out = *(uint32_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else if (bytes_per_pixel == 2) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint16_t *)out = *(uint16_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + memcpy(out, in, bytes_per_pixel); + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } +} + +static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, + unsigned int height, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + size_t out_pitch = width * bytes_per_pixel * factor; + + for (unsigned int y = 0; y < height; y++) { + surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); + uint8_t *row_in = out; + for (unsigned int i = 1; i < factor; i++) { + out += out_pitch; + memcpy(out, row_in, out_pitch); + } + in += width * bytes_per_pixel; + out += out_pitch; + } +} + +void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force) +{ + if (!(surface->upload_pending || force)) { + return; + } + + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + + trace_nv2a_pgraph_surface_upload( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + PGRAPHState *pg = &d->pgraph; + + surface->upload_pending = false; + surface->draw_time = pg->draw_time; + + // FIXME: Don't query GL for texture binding + GLint last_texture_binding; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); + + // FIXME: Replace with FBO to not disturb current state + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + surface->vram_addr; + + if (surface->swizzle) { + buf = (uint8_t*)g_malloc(surface->size); + unswizzle_rect(data + surface->vram_addr, + surface->width, surface->height, + buf, + surface->pitch, + surface->fmt.bytes_per_pixel); + } + + /* FIXME: Replace this flip/scaling */ + + // This is VRAM so we can't do this inplace! + uint8_t *flipped_buf = (uint8_t *)g_malloc( + surface->height * surface->width * surface->fmt.bytes_per_pixel); + unsigned int irow; + for (irow = 0; irow < surface->height; irow++) { + memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) + * surface->fmt.bytes_per_pixel], + &buf[surface->pitch * irow], + surface->width * surface->fmt.bytes_per_pixel); + } + + uint8_t *gl_read_buf = flipped_buf; + unsigned int width = surface->width, height = surface->height; + + if (pg->surface_scale_factor > 1) { + pgraph_apply_scaling_factor(pg, &width, &height); + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); + gl_read_buf = pg->scale_buf; + uint8_t *out = gl_read_buf, *in = flipped_buf; + surface_copy_expand(out, in, surface->width, surface->height, + surface->fmt.bytes_per_pixel, + d->pgraph.surface_scale_factor); + } + + int prev_unpack_alignment; + glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment); + if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) { + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + } else { + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + } + + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, + height, 0, surface->fmt.gl_format, surface->fmt.gl_type, + gl_read_buf); + glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); + g_free(flipped_buf); + if (surface->swizzle) { + g_free(buf); + } + + // Rebind previous framebuffer binding + glBindTexture(GL_TEXTURE_2D, last_texture_binding); + + bind_current_surface(d); +} + +static void compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2) +{ + #define DO_CMP(fld) \ + if (s1->fld != s2->fld) \ + trace_nv2a_pgraph_surface_compare_mismatch( \ + #fld, (long int)s1->fld, (long int)s2->fld); + DO_CMP(shape.clip_x) + DO_CMP(shape.clip_width) + DO_CMP(shape.clip_y) + DO_CMP(shape.clip_height) + DO_CMP(gl_buffer) + DO_CMP(fmt.bytes_per_pixel) + DO_CMP(fmt.gl_attachment) + DO_CMP(fmt.gl_internal_format) + DO_CMP(fmt.gl_format) + DO_CMP(fmt.gl_type) + DO_CMP(color) + DO_CMP(swizzle) + DO_CMP(vram_addr) + DO_CMP(width) + DO_CMP(height) + DO_CMP(pitch) + DO_CMP(size) + DO_CMP(dma_addr) + DO_CMP(dma_len) + DO_CMP(frame_time) + DO_CMP(draw_time) + #undef DO_CMP +} + +static void populate_surface_binding_entry_sized(NV2AState *d, bool color, + unsigned int width, + unsigned int height, + SurfaceBinding *entry) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + Surface *surface; + hwaddr dma_address; + SurfaceFormatInfo fmt; + + if (color) { + surface = &pg->surface_color; + dma_address = pg->dma_color; + assert(pg->surface_shape.color_format != 0); + assert(pg->surface_shape.color_format < + ARRAY_SIZE(kelvin_surface_color_format_gl_map)); + fmt = kelvin_surface_color_format_gl_map[pg->surface_shape.color_format]; + if (fmt.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", + pg->surface_shape.color_format); + abort(); + } + } else { + surface = &pg->surface_zeta; + dma_address = pg->dma_zeta; + assert(pg->surface_shape.zeta_format != 0); + assert(pg->surface_shape.zeta_format < + ARRAY_SIZE(kelvin_surface_zeta_float_format_gl_map)); + const SurfaceFormatInfo *map = + pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_gl_map : + kelvin_surface_zeta_fixed_format_gl_map; + fmt = map[pg->surface_shape.zeta_format]; + } + + DMAObject dma = nv_dma_load(d, dma_address); + /* There's a bunch of bugs that could cause us to hit this function + * at the wrong time and get a invalid dma object. + * Check that it's sane. */ + assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); + // assert(dma.address + surface->offset != 0); + assert(surface->offset <= dma.limit); + assert(surface->offset + surface->pitch * height <= dma.limit + 1); + assert(surface->pitch % fmt.bytes_per_pixel == 0); + assert((dma.address & ~0x07FFFFFF) == 0); + + entry->shape = (color || !r->color_binding) ? pg->surface_shape : + r->color_binding->shape; + entry->gl_buffer = 0; + entry->fmt = fmt; + entry->color = color; + entry->swizzle = + (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + entry->vram_addr = dma.address + surface->offset; + entry->width = width; + entry->height = height; + entry->pitch = surface->pitch; + entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); + entry->upload_pending = true; + entry->download_pending = false; + entry->draw_dirty = false; + entry->dma_addr = dma.address; + entry->dma_len = dma.limit; + entry->frame_time = pg->frame_time; + entry->draw_time = pg->draw_time; + entry->cleared = false; +} + +static void populate_surface_binding_entry(NV2AState *d, bool color, + SurfaceBinding *entry) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int width, height; + + if (color || !r->color_binding) { + surface_get_dimensions(pg, &width, &height); + pgraph_apply_anti_aliasing_factor(pg, &width, &height); + + /* Since we determine surface dimensions based on the clipping + * rectangle, make sure to include the surface offset as well. + */ + if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { + width += pg->surface_shape.clip_x; + height += pg->surface_shape.clip_y; + } + } else { + width = r->color_binding->width; + height = r->color_binding->height; + } + + populate_surface_binding_entry_sized(d, color, width, height, entry); +} + +static void update_surface_part(NV2AState *d, bool upload, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding entry; + populate_surface_binding_entry(d, color, &entry); + + Surface *surface = color ? &pg->surface_color : &pg->surface_zeta; + + bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( + d->vram, entry.vram_addr, entry.size, + DIRTY_MEMORY_NV2A); + + if (upload && (surface->buffer_dirty || mem_dirty)) { + pgraph_gl_unbind_surface(d, color); + + SurfaceBinding *found = pgraph_gl_surface_get(d, entry.vram_addr); + if (found != NULL) { + /* FIXME: Support same color/zeta surface target? In the mean time, + * if the surface we just found is currently bound, just unbind it. + */ + SurfaceBinding *other = (color ? r->zeta_binding + : r->color_binding); + if (found == other) { + NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); + pgraph_gl_unbind_surface(d, !color); + } + } + + trace_nv2a_pgraph_surface_target( + color ? "COLOR" : "ZETA", entry.vram_addr, + entry.swizzle ? "sz" : "ln", + pg->surface_shape.anti_aliasing, + pg->surface_shape.clip_x, + pg->surface_shape.clip_width, pg->surface_shape.clip_y, + pg->surface_shape.clip_height); + + bool should_create = true; + + if (found != NULL) { + bool is_compatible = + check_surface_compatibility(found, &entry, false); + +#define TRACE_ARGS found->vram_addr, found->width, found->height, \ + found->swizzle ? "sz" : "ln", \ + found->shape.anti_aliasing, found->shape.clip_x, \ + found->shape.clip_width, found->shape.clip_y, \ + found->shape.clip_height, found->pitch + if (found->color) { + trace_nv2a_pgraph_surface_match_color(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS); + } +#undef TRACE_ARGS + + assert(!(entry.swizzle && pg->clearing)); + + if (found->swizzle != entry.swizzle) { + /* Clears should only be done on linear surfaces. Avoid + * synchronization by allowing (1) a surface marked swizzled to + * be cleared under the assumption the entire surface is + * destined to be cleared and (2) a fully cleared linear surface + * to be marked swizzled. Strictly match size to avoid + * pathological cases. + */ + is_compatible &= (pg->clearing || found->cleared) && + check_surface_compatibility(found, &entry, true); + if (is_compatible) { + trace_nv2a_pgraph_surface_migrate_type( + entry.swizzle ? "swizzled" : "linear"); + } + } + + if (is_compatible && color && + !check_surface_compatibility(found, &entry, true)) { + SurfaceBinding zeta_entry; + populate_surface_binding_entry_sized( + d, !color, found->width, found->height, &zeta_entry); + hwaddr color_end = found->vram_addr + found->size; + hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; + is_compatible &= found->vram_addr >= zeta_end || + zeta_entry.vram_addr >= color_end; + } + + if (is_compatible && !color && r->color_binding) { + is_compatible &= (found->width == r->color_binding->width) && + (found->height == r->color_binding->height); + } + + if (is_compatible) { + /* FIXME: Refactor */ + pg->surface_binding_dim.width = found->width; + pg->surface_binding_dim.clip_x = found->shape.clip_x; + pg->surface_binding_dim.clip_width = found->shape.clip_width; + pg->surface_binding_dim.height = found->height; + pg->surface_binding_dim.clip_y = found->shape.clip_y; + pg->surface_binding_dim.clip_height = found->shape.clip_height; + found->upload_pending |= mem_dirty; + pg->surface_zeta.buffer_dirty |= color; + should_create = false; + } else { + trace_nv2a_pgraph_surface_evict_reason( + "incompatible", found->vram_addr); + compare_surfaces(found, &entry); + pgraph_gl_surface_download_if_dirty(d, found); + pgraph_gl_surface_invalidate(d, found); + } + } + + if (should_create) { + glGenTextures(1, &entry.gl_buffer); + glBindTexture(GL_TEXTURE_2D, entry.gl_buffer); + NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer, + "%s format: %0X, width: %d, height: %d " + "(addr %" HWADDR_PRIx ")", + color ? "color" : "zeta", + color ? pg->surface_shape.color_format + : pg->surface_shape.zeta_format, + entry.width, entry.height, surface->offset); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + unsigned int width = entry.width, height = entry.height; + pgraph_apply_scaling_factor(pg, &width, &height); + glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width, + height, 0, entry.fmt.gl_format, entry.fmt.gl_type, + NULL); + found = surface_put(d, entry.vram_addr, &entry); + + /* FIXME: Refactor */ + pg->surface_binding_dim.width = entry.width; + pg->surface_binding_dim.clip_x = entry.shape.clip_x; + pg->surface_binding_dim.clip_width = entry.shape.clip_width; + pg->surface_binding_dim.height = entry.height; + pg->surface_binding_dim.clip_y = entry.shape.clip_y; + pg->surface_binding_dim.clip_height = entry.shape.clip_height; + + if (color && r->zeta_binding && (r->zeta_binding->width != entry.width || r->zeta_binding->height != entry.height)) { + pg->surface_zeta.buffer_dirty = true; + } + } + +#define TRACE_ARGS found->vram_addr, found->width, found->height, \ + found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \ + found->shape.clip_x, found->shape.clip_width, \ + found->shape.clip_y, found->shape.clip_height, found->pitch + + if (color) { + if (should_create) { + trace_nv2a_pgraph_surface_create_color(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS); + } + + r->color_binding = found; + } else { + if (should_create) { + trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS); + } + r->zeta_binding = found; + } +#undef TRACE_ARGS + + glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment, + GL_TEXTURE_2D, found->gl_buffer, 0); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == + GL_FRAMEBUFFER_COMPLETE); + + surface->buffer_dirty = false; + } + + if (!upload && surface->draw_dirty) { + if (!tcg_enabled()) { + /* FIXME: Cannot monitor for reads/writes; flush now */ + surface_download(d, + color ? r->color_binding : + r->zeta_binding, + true); + } + + surface->write_enabled_cache = false; + surface->draw_dirty = false; + } +} + +void pgraph_gl_unbind_surface(NV2AState *d, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (color) { + if (r->color_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, 0, 0); + r->color_binding = NULL; + } + } else { + if (r->zeta_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + r->zeta_binding = NULL; + } + } +} + +void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + pg->surface_shape.z_format = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + color_write = color_write && + (pg->clearing || pgraph_color_write_enabled(pg)); + zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); + + if (upload) { + bool fb_dirty = framebuffer_dirty(pg); + if (fb_dirty) { + memcpy(&pg->last_surface_shape, &pg->surface_shape, + sizeof(SurfaceShape)); + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } + + if (pg->surface_color.buffer_dirty) { + pgraph_gl_unbind_surface(d, true); + } + + if (color_write) { + update_surface_part(d, true, true); + } + + if (pg->surface_zeta.buffer_dirty) { + pgraph_gl_unbind_surface(d, false); + } + + if (zeta_write) { + update_surface_part(d, true, false); + } + } else { + if ((color_write || pg->surface_color.write_enabled_cache) + && pg->surface_color.draw_dirty) { + update_surface_part(d, false, true); + } + if ((zeta_write || pg->surface_zeta.write_enabled_cache) + && pg->surface_zeta.draw_dirty) { + update_surface_part(d, false, false); + } + } + + if (upload) { + pg->draw_time++; + } + + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + + if (r->color_binding) { + r->color_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_gl_upload_surface_data(d, r->color_binding, false); + r->color_binding->draw_time = pg->draw_time; + r->color_binding->swizzle = swizzle; + } + } + + if (r->zeta_binding) { + r->zeta_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_gl_upload_surface_data(d, r->zeta_binding, false); + r->zeta_binding->draw_time = pg->draw_time; + r->zeta_binding->swizzle = swizzle; + } + } + + // Sanity check color and zeta dimensions match + if (r->color_binding && r->zeta_binding) { + assert((r->color_binding->width == r->zeta_binding->width) + && (r->color_binding->height == r->zeta_binding->height)); + } + + surface_evict_old(d); +} + +// FIXME: Move to common +static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, + unsigned int *height) +{ + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + if (swizzle) { + *width = 1 << pg->surface_shape.log_width; + *height = 1 << pg->surface_shape.log_height; + } else { + *width = pg->surface_shape.clip_width; + *height = pg->surface_shape.clip_height; + } +} + +void pgraph_gl_init_surfaces(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + pgraph_gl_reload_surface_scale_factor(pg); + glGenFramebuffers(1, &r->gl_framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer); + QTAILQ_INIT(&r->surfaces); + r->downloads_pending = false; + qemu_event_init(&r->downloads_complete, false); + qemu_event_init(&r->dirty_surfaces_download_complete, false); + + init_render_to_texture(pg); +} + +void pgraph_gl_deinit_surfaces(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + glDeleteFramebuffers(1, &r->gl_framebuffer); + // TODO: clear out surfaces +} + +void pgraph_gl_surface_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + bool update_surface = (r->color_binding || r->zeta_binding); + + /* Clear last surface shape to force recreation of buffers at next draw */ + pg->surface_color.draw_dirty = false; + pg->surface_zeta.draw_dirty = false; + memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); + pgraph_gl_unbind_surface(d, true); + pgraph_gl_unbind_surface(d, false); + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + pgraph_gl_surface_invalidate(d, s); + } + + pgraph_gl_reload_surface_scale_factor(pg); + + if (update_surface) { + pgraph_gl_surface_update(d, true, true, true); + } +} diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c new file mode 100644 index 0000000000..bf072f44d6 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -0,0 +1,819 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/fast-hash.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "hw/xbox/nv2a/pgraph/s3tc.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "debug.h" +#include "renderer.h" + +static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); +static void texture_binding_destroy(gpointer data); + +struct pgraph_texture_possibly_dirty_struct { + hwaddr addr, end; +}; + +static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) +{ + struct pgraph_texture_possibly_dirty_struct *test = + (struct pgraph_texture_possibly_dirty_struct *)opaque; + + struct TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding == NULL || tnode->possibly_dirty) { + return; + } + + uintptr_t k_tex_addr = tnode->key.texture_vram_offset; + uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; + bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); + + if (tnode->key.palette_length > 0) { + uintptr_t k_pal_addr = tnode->key.palette_vram_offset; + uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; + overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); + } + + tnode->possibly_dirty |= overlapping; +} + +void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, + hwaddr addr, hwaddr size) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; + addr &= TARGET_PAGE_MASK; + assert(end <= memory_region_size(d->vram)); + + struct pgraph_texture_possibly_dirty_struct test = { + .addr = addr, + .end = end, + }; + + lru_visit_active(&r->texture_cache, + mark_textures_possibly_dirty_visitor, + &test); +} + +static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, + DIRTY_MEMORY_NV2A_TEX); +} + +// Check if any of the pages spanned by the a texture are dirty. +static bool check_texture_possibly_dirty(NV2AState *d, + hwaddr texture_vram_offset, + unsigned int length, + hwaddr palette_vram_offset, + unsigned int palette_length) +{ + bool possibly_dirty = false; + if (check_texture_dirty(d, texture_vram_offset, length)) { + possibly_dirty = true; + pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length); + } + if (palette_length && check_texture_dirty(d, palette_vram_offset, + palette_length)) { + possibly_dirty = true; + pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset, + palette_length); + } + return possibly_dirty; +} + +static void apply_texture_parameters(TextureBinding *binding, + const BasicColorFormatInfo *f, + unsigned int dimensionality, + unsigned int filter, + unsigned int address, + bool is_bordered, + uint32_t border_color) +{ + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); + unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); + unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); + unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); + + if (f->linear) { + /* somtimes games try to set mipmap min filters on linear textures. + * this could indicate a bug... */ + switch (min_filter) { + case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; + break; + case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; + break; + } + } + + if (min_filter != binding->min_filter) { + glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, + pgraph_texture_min_filter_gl_map[min_filter]); + binding->min_filter = min_filter; + } + if (mag_filter != binding->mag_filter) { + glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, + pgraph_texture_mag_filter_gl_map[mag_filter]); + binding->mag_filter = mag_filter; + } + + /* Texture wrapping */ + assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + if (addru != binding->addru) { + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, + pgraph_texture_addr_gl_map[addru]); + binding->addru = addru; + } + bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + if (dimensionality > 1) { + if (addrv != binding->addrv) { + assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, + pgraph_texture_addr_gl_map[addrv]); + binding->addrv = addrv; + } + needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + } + if (dimensionality > 2) { + if (addrp != binding->addrp) { + assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, + pgraph_texture_addr_gl_map[addrp]); + binding->addrp = addrp; + } + needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + } + + if (!is_bordered && needs_border_color) { + if (!binding->border_color_set || binding->border_color != border_color) { + /* FIXME: Color channels might be wrong order */ + GLfloat gl_border_color[4]; + pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color); + glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, + gl_border_color); + + binding->border_color_set = true; + binding->border_color = border_color; + } + } +} + +void pgraph_gl_bind_textures(NV2AState *d) +{ + int i; + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("%s", __func__); + + for (i=0; ivram)); + assert((palette_vram_offset + palette_length) + < memory_region_size(d->vram)); + bool is_indexed = (state.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); + bool possibly_dirty = false; + bool possibly_dirty_checked = false; + + SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset); + TextureBinding *tbind = r->texture_binding[i]; + if (!pg->texture_dirty[i] && tbind) { + bool reusable = false; + if (surface && tbind->draw_time == surface->draw_time) { + reusable = true; + } else if (!surface) { + possibly_dirty = check_texture_possibly_dirty( + d, + texture_vram_offset, + length, + palette_vram_offset, + is_indexed ? palette_length : 0); + possibly_dirty_checked = true; + reusable = !possibly_dirty; + } + + if (reusable) { + glBindTexture(r->texture_binding[i]->gl_target, + r->texture_binding[i]->gl_texture); + apply_texture_parameters(r->texture_binding[i], + &kelvin_color_format_info_map[state.color_format], + state.dimensionality, + filter, + address, + state.border, + border_color); + continue; + } + } + + /* + * Check active surfaces to see if this texture was a render target + */ + bool surf_to_tex = false; + if (surface != NULL) { + surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility( + surface, &state); + + if (surf_to_tex && surface->upload_pending) { + pgraph_gl_upload_surface_data(d, surface, false); + } + } + + if (!surf_to_tex) { + // FIXME: Restructure to support rendering surfaces to cubemap faces + + // Writeback any surfaces which this texture may index + hwaddr tex_vram_end = texture_vram_offset + length - 1; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= tex_vram_end + || texture_vram_offset >= surf_vram_end); + if (overlapping) { + pgraph_gl_surface_download_if_dirty(d, surface); + } + } + } + + TextureKey key; + memset(&key, 0, sizeof(TextureKey)); + key.state = state; + key.texture_vram_offset = texture_vram_offset; + key.texture_length = length; + if (is_indexed) { + key.palette_vram_offset = palette_vram_offset; + key.palette_length = palette_length; + } + + // Search for existing texture binding in cache + uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key)); + LruNode *found = lru_lookup(&r->texture_cache, + tex_binding_hash, &key); + TextureLruNode *key_out = container_of(found, TextureLruNode, node); + possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty; + + if (!surf_to_tex && !possibly_dirty_checked) { + possibly_dirty |= check_texture_possibly_dirty( + d, + texture_vram_offset, + length, + palette_vram_offset, + is_indexed ? palette_length : 0); + } + + // Calculate hash of texture data, if necessary + void *texture_data = (char*)d->vram_ptr + texture_vram_offset; + void *palette_data = (char*)d->vram_ptr + palette_vram_offset; + + uint64_t tex_data_hash = 0; + if (!surf_to_tex && possibly_dirty) { + tex_data_hash = fast_hash(texture_data, length); + if (is_indexed) { + tex_data_hash ^= fast_hash(palette_data, palette_length); + } + } + + // Free existing binding, if texture data has changed + bool must_destroy = (key_out->binding != NULL) + && possibly_dirty + && (key_out->binding->data_hash != tex_data_hash); + if (must_destroy) { + texture_binding_destroy(key_out->binding); + key_out->binding = NULL; + } + + if (key_out->binding == NULL) { + // Must create the texture + key_out->binding = generate_texture(state, texture_data, palette_data); + key_out->binding->data_hash = tex_data_hash; + key_out->binding->scale = 1; + } else { + // Saved an upload! Reuse existing texture in graphics memory. + glBindTexture(key_out->binding->gl_target, + key_out->binding->gl_texture); + } + + key_out->possibly_dirty = false; + TextureBinding *binding = key_out->binding; + binding->refcnt++; + + if (surf_to_tex && binding->draw_time < surface->draw_time) { + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i); + binding->draw_time = surface->draw_time; + if (binding->gl_target == GL_TEXTURE_RECTANGLE) { + binding->scale = pg->surface_scale_factor; + } else { + binding->scale = 1; + } + } + + apply_texture_parameters(binding, + &kelvin_color_format_info_map[state.color_format], + state.dimensionality, + filter, + address, + state.border, + border_color); + + if (r->texture_binding[i]) { + if (r->texture_binding[i]->gl_target != binding->gl_target) { + glBindTexture(r->texture_binding[i]->gl_target, 0); + } + texture_binding_destroy(r->texture_binding[i]); + } + r->texture_binding[i] = binding; + pg->texture_dirty[i] = false; + } + NV2A_GL_DGROUP_END(); +} + +static enum S3TC_DECOMPRESS_FORMAT +gl_internal_format_to_s3tc_enum(GLint gl_internal_format) +{ + switch (gl_internal_format) { + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT1; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT3; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT5; + default: + assert(!"Invalid format"); + } +} + +static void upload_gl_texture(GLenum gl_target, + const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); + + unsigned int adjusted_width = s.width; + unsigned int adjusted_height = s.height; + unsigned int adjusted_pitch = s.pitch; + unsigned int adjusted_depth = s.depth; + if (!f.linear && s.border) { + adjusted_width = MAX(16, adjusted_width * 2); + adjusted_height = MAX(16, adjusted_height * 2); + adjusted_pitch = adjusted_width * (s.pitch / s.width); + adjusted_depth = MAX(16, s.depth * 2); + } + + switch(gl_target) { + case GL_TEXTURE_1D: + assert(false); + break; + case GL_TEXTURE_RECTANGLE: { + /* Can't handle strides unaligned to pixels */ + assert(s.pitch % f.bytes_per_pixel == 0); + + uint8_t *converted = pgraph_convert_texture_data( + s, texture_data, palette_data, adjusted_width, adjusted_height, 1, + adjusted_pitch, 0, NULL); + glPixelStorei(GL_UNPACK_ROW_LENGTH, + converted ? 0 : adjusted_pitch / f.bytes_per_pixel); + glTexImage2D(gl_target, 0, f.gl_internal_format, + adjusted_width, adjusted_height, 0, + f.gl_format, f.gl_type, + converted ? converted : texture_data); + + if (converted) { + g_free(converted); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + break; + } + case GL_TEXTURE_2D: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { + + unsigned int width = adjusted_width, height = adjusted_height; + + int level; + for (level = 0; level < s.levels; level++) { + width = MAX(width, 1); + height = MAX(height, 1); + + if (f.gl_format == 0) { /* compressed */ + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size + unsigned int block_size = + f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? + 8 : 16; + unsigned int physical_width = (width + 3) & ~3, + physical_height = (height + 3) & ~3; + if (physical_width != width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); + } + uint8_t *converted = s3tc_decompress_2d( + gl_internal_format_to_s3tc_enum(f.gl_internal_format), + texture_data, physical_width, physical_height); + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); + tex_width = s.width; + tex_height = s.height; + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + } + } + + glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted); + g_free(converted); + if (physical_width != width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + if (s.cubemap && adjusted_width != s.width) { + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + } + texture_data += + physical_width / 4 * physical_height / 4 * block_size; + } else { + unsigned int pitch = width * f.bytes_per_pixel; + uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); + unswizzle_rect(texture_data, width, height, + unswizzled, pitch, f.bytes_per_pixel); + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data, width, height, 1, pitch, 0, + NULL); + uint8_t *pixel_data = converted ? converted : unswizzled; + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + tex_width = s.width; + tex_height = s.height; + pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; + } + + glTexImage2D(gl_target, level, f.gl_internal_format, tex_width, + tex_height, 0, f.gl_format, f.gl_type, + pixel_data); + if (s.cubemap && s.border) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + } + + break; + } + case GL_TEXTURE_3D: { + + unsigned int width = adjusted_width; + unsigned int height = adjusted_height; + unsigned int depth = adjusted_depth; + + assert(f.linear == false); + + int level; + for (level = 0; level < s.levels; level++) { + if (f.gl_format == 0) { /* compressed */ + assert(width % 4 == 0 && height % 4 == 0 && + "Compressed 3D texture virtual size"); + width = MAX(width, 4); + height = MAX(height, 4); + depth = MAX(depth, 1); + + unsigned int block_size; + if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + size_t texture_size = width/4 * height/4 * depth * block_size; + + uint8_t *converted = s3tc_decompress_3d( + gl_internal_format_to_s3tc_enum(f.gl_internal_format), + texture_data, width, height, depth); + + glTexImage3D(gl_target, level, GL_RGBA8, + width, height, depth, 0, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, + converted); + + g_free(converted); + + texture_data += texture_size; + } else { + width = MAX(width, 1); + height = MAX(height, 1); + depth = MAX(depth, 1); + + unsigned int row_pitch = width * f.bytes_per_pixel; + unsigned int slice_pitch = row_pitch * height; + uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth); + unswizzle_box(texture_data, width, height, depth, unswizzled, + row_pitch, slice_pitch, f.bytes_per_pixel); + + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data, width, height, depth, + row_pitch, slice_pitch, NULL); + + glTexImage3D(gl_target, level, f.gl_internal_format, + width, height, depth, 0, + f.gl_format, f.gl_type, + converted ? converted : unswizzled); + + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * depth * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + depth /= 2; + } + break; + } + default: + assert(false); + break; + } +} + +static TextureBinding* generate_texture(const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + + /* Create a new opengl texture */ + GLuint gl_texture; + glGenTextures(1, &gl_texture); + + GLenum gl_target; + if (s.cubemap) { + assert(f.linear == false); + assert(s.dimensionality == 2); + gl_target = GL_TEXTURE_CUBE_MAP; + } else { + if (f.linear) { + /* linear textures use unnormalised texcoords. + * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but + * does not allow repeat and mirror wrap modes. + * (or mipmapping, but xbox d3d says 'Non swizzled and non + * compressed textures cannot be mip mapped.') + * Not sure if that'll be an issue. */ + + /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ + gl_target = GL_TEXTURE_RECTANGLE; + assert(s.dimensionality == 2); + } else { + switch(s.dimensionality) { + case 1: gl_target = GL_TEXTURE_1D; break; + case 2: gl_target = GL_TEXTURE_2D; break; + case 3: gl_target = GL_TEXTURE_3D; break; + default: + assert(false); + break; + } + } + } + + glBindTexture(gl_target, gl_texture); + + NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, + "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, " + "width: %d, height: %d, depth: %d", + texture_data - g_nv2a->vram_ptr, + s.color_format, f.linear ? "" : " (SZ)", + s.dimensionality, s.cubemap ? " (Cubemap)" : "", + s.width, s.height, s.depth); + + if (gl_target == GL_TEXTURE_CUBE_MAP) { + + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + unsigned int block_size; + if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + size_t length = 0; + unsigned int w = s.width; + unsigned int h = s.height; + if (!f.linear && s.border) { + w = MAX(16, w * 2); + h = MAX(16, h * 2); + } + + int level; + for (level = 0; level < s.levels; level++) { + if (f.gl_format == 0) { + length += w/4 * h/4 * block_size; + } else { + length += w * h * f.bytes_per_pixel; + } + + w /= 2; + h /= 2; + } + + length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); + + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, + s, texture_data + 0 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + s, texture_data + 1 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, + s, texture_data + 2 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + s, texture_data + 3 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + s, texture_data + 4 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, + s, texture_data + 5 * length, palette_data); + } else { + upload_gl_texture(gl_target, s, texture_data, palette_data); + } + + /* Linear textures don't support mipmapping */ + if (!f.linear) { + glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, + s.min_mipmap_level); + glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, + s.levels - 1); + } + + if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 + || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { + glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, + (const GLint *)f.gl_swizzle_mask); + } + + TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding)); + ret->gl_target = gl_target; + ret->gl_texture = gl_texture; + ret->refcnt = 1; + ret->draw_time = 0; + ret->data_hash = 0; + ret->min_filter = 0xFFFFFFFF; + ret->mag_filter = 0xFFFFFFFF; + ret->addru = 0xFFFFFFFF; + ret->addrv = 0xFFFFFFFF; + ret->addrp = 0xFFFFFFFF; + ret->border_color_set = false; + return ret; +} + +static void texture_binding_destroy(gpointer data) +{ + TextureBinding *binding = (TextureBinding *)data; + assert(binding->refcnt > 0); + binding->refcnt--; + if (binding->refcnt == 0) { + glDeleteTextures(1, &binding->gl_texture); + g_free(binding); + } +} + +/* functions for texture LRU cache */ +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + memcpy(&tnode->key, key, sizeof(TextureKey)); + + tnode->binding = NULL; + tnode->possibly_dirty = false; +} + +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding) { + texture_binding_destroy(tnode->binding); + tnode->binding = NULL; + tnode->possibly_dirty = false; + } +} + +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + return memcmp(&tnode->key, key, sizeof(TextureKey)); +} + +void pgraph_gl_init_texture_cache(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const size_t texture_cache_size = 512; + lru_init(&r->texture_cache); + r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode)); + assert(r->texture_cache_entries != NULL); + for (int i = 0; i < texture_cache_size; i++) { + lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node); + } + + r->texture_cache.init_node = texture_cache_entry_init; + r->texture_cache.compare_nodes = texture_cache_entry_compare; + r->texture_cache.post_node_evict = texture_cache_entry_post_evict; +} + +void pgraph_gl_deinit_texture_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + // Clear out texture cache + lru_flush(&r->texture_cache); + free(r->texture_cache_entries); +} diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c new file mode 100644 index 0000000000..21f42b647c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/vertex.c @@ -0,0 +1,283 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_regs.h" +#include +#include "debug.h" +#include "renderer.h" + +static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, + bool quick) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + + static hwaddr last_addr, last_end; + if (quick && (addr >= last_addr) && (end <= last_end)) { + return; + } + last_addr = addr; + last_end = end; + + size = end - addr; + if (memory_region_test_and_clear_dirty(d->vram, addr, size, + DIRTY_MEMORY_NV2A)) { + glBufferSubData(GL_ARRAY_BUFFER, addr, size, + d->vram_ptr + addr); + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); + } +} + +void pgraph_gl_update_entire_memory_buffer(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr); +} + +void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + bool updated_memory_buffer = false; + unsigned int num_elements = max_element - min_element + 1; + + if (inline_data) { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", + __func__, num_elements, inline_stride); + } else { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); + } + + pg->compressed_attrs = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + + if (!attr->count) { + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + continue; + } + + NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", + attr->format, attr->count, attr->stride); + + GLint gl_count = attr->count; + GLenum gl_type; + GLboolean gl_normalize; + bool needs_conversion = false; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + gl_type = GL_UNSIGNED_BYTE; + gl_normalize = GL_TRUE; + // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt + gl_count = GL_BGRA; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + gl_type = GL_UNSIGNED_BYTE; + gl_normalize = GL_TRUE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + gl_type = GL_SHORT; + gl_normalize = GL_TRUE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + gl_type = GL_FLOAT; + gl_normalize = GL_FALSE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + gl_type = GL_SHORT; + gl_normalize = GL_FALSE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + gl_type = GL_INT; + assert(attr->count == 1); + needs_conversion = true; + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); + hwaddr attrib_data_addr; + size_t stride; + + if (needs_conversion) { + pg->compressed_attrs |= (1 << i); + } + + hwaddr start = 0; + if (inline_data) { + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer); + attrib_data_addr = attr->inline_array_offset; + stride = inline_stride; + } else { + hwaddr dma_len; + uint8_t *attr_data = (uint8_t *)nv_dma_map( + d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, + &dma_len); + assert(attr->offset < dma_len); + attrib_data_addr = attr_data + attr->offset - d->vram_ptr; + stride = attr->stride; + start = attrib_data_addr + min_element * stride; + update_memory_buffer(d, start, num_elements * stride, + updated_memory_buffer); + updated_memory_buffer = true; + } + + uint32_t provoking_element_index = provoking_element - min_element; + size_t element_size = attr->size * attr->count; + assert(element_size <= sizeof(attr->inline_value)); + const uint8_t *last_entry; + + if (inline_data) { + last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset; + } else { + last_entry = d->vram_ptr + start; + } + if (!stride) { + // Stride of 0 indicates that only the first element should be + // used. + pgraph_update_inline_value(attr, last_entry); + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + continue; + } + + if (needs_conversion) { + glVertexAttribIPointer(i, gl_count, gl_type, stride, + (void *)attrib_data_addr); + } else { + glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride, + (void *)attrib_data_addr); + } + + glEnableVertexAttribArray(i); + last_entry += stride * provoking_element_index; + pgraph_update_inline_value(attr, last_entry); + } + + NV2A_GL_DGROUP_END(); +} + +unsigned int pgraph_gl_bind_inline_array(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int offset = 0; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->count == 0) { + continue; + } + + /* FIXME: Double check */ + offset = ROUND_UP(offset, attr->size); + attr->inline_array_offset = offset; + NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", + i, attr->size, attr->count); + offset += attr->size * attr->count; + offset = ROUND_UP(offset, attr->size); + } + + unsigned int vertex_size = offset; + unsigned int index_count = pg->inline_array_length*4 / vertex_size; + + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer); + glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t), + NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array); + pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size, + index_count-1); + + return index_count; +} + +static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key) +{ + VertexLruNode *vnode = container_of(node, VertexLruNode, node); + memcpy(&vnode->key, key, sizeof(struct VertexKey)); + vnode->initialized = false; +} + +static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + VertexLruNode *vnode = container_of(node, VertexLruNode, node); + return memcmp(&vnode->key, key, sizeof(VertexKey)); +} + +void pgraph_gl_init_vertex_cache(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const size_t element_cache_size = 50*1024; + lru_init(&r->element_cache); + r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode)); + assert(r->element_cache_entries != NULL); + GLuint element_cache_buffers[element_cache_size]; + glGenBuffers(element_cache_size, element_cache_buffers); + for (int i = 0; i < element_cache_size; i++) { + r->element_cache_entries[i].gl_buffer = element_cache_buffers[i]; + lru_add_free(&r->element_cache, &r->element_cache_entries[i].node); + } + + r->element_cache.init_node = vertex_cache_entry_init; + r->element_cache.compare_nodes = vertex_cache_entry_compare; + + GLint max_vertex_attributes; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); + assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); + + glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer); + glGenBuffers(1, &r->gl_inline_array_buffer); + + glGenBuffers(1, &r->gl_memory_buffer); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram), + NULL, GL_DYNAMIC_DRAW); + + glGenVertexArrays(1, &r->gl_vertex_array); + glBindVertexArray(r->gl_vertex_array); + + assert(glGetError() == GL_NO_ERROR); +} diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c new file mode 100644 index 0000000000..7059880373 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -0,0 +1,58 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + + +#include "common.h" + + +MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array) +{ + const char *flat_s = "flat"; + const char *noperspective_s = "noperspective"; + const char *qualifier_s = smooth ? noperspective_s : flat_s; + const char *qualifiers[11] = { + noperspective_s, flat_s, qualifier_s, qualifier_s, + qualifier_s, qualifier_s, noperspective_s, noperspective_s, + noperspective_s, noperspective_s, noperspective_s + }; + + const char *in_out_s = in ? "in" : "out"; + + const char *float_s = "float"; + const char *vec4_s = "vec4"; + const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s, + float_s, vec4_s, vec4_s, vec4_s, vec4_s }; + + const char *prefix_s = prefix ? "v_" : ""; + const char *names[11] = { + "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1", + "vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3", + }; + const char *suffix_s = array ? "[]" : ""; + + for (int i = 0; i < 11; i++) { + if (location) { + mstring_append_fmt(out, "layout(location = %d) ", i); + } + mstring_append_fmt(out, "%s %s %s %s%s%s;\n", + qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s); + } + + return out; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h new file mode 100644 index 0000000000..6820a1dcb1 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/common.h @@ -0,0 +1,38 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_NV2A_SHADERS_COMMON_H +#define HW_NV2A_SHADERS_COMMON_H + +#include "qemu/mstring.h" +#include + +#define GLSL_C(idx) "c[" stringify(idx) "]" +#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]" + +#define GLSL_C_MAT4(idx) \ + "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \ + GLSL_C(idx+2) ", " GLSL_C(idx+3) ")" + +#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n" + +MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c new file mode 100644 index 0000000000..0e738f0280 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -0,0 +1,228 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "geom.h" + +MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, + enum ShaderPolygonMode polygon_back_mode, + enum ShaderPrimitiveMode primitive_mode, + bool smooth_shading, + bool vulkan) +{ + /* FIXME: Missing support for 2-sided-poly mode */ + assert(polygon_front_mode == polygon_back_mode); + enum ShaderPolygonMode polygon_mode = polygon_front_mode; + + /* POINT mode shouldn't require any special work */ + if (polygon_mode == POLY_MODE_POINT) { + return NULL; + } + + /* Handle LINE and FILL mode */ + const char *layout_in = NULL; + const char *layout_out = NULL; + const char *body = NULL; + switch (primitive_mode) { + case PRIM_TYPE_POINTS: return NULL; + case PRIM_TYPE_LINES: return NULL; + case PRIM_TYPE_LINE_LOOP: return NULL; + case PRIM_TYPE_LINE_STRIP: return NULL; + case PRIM_TYPE_TRIANGLES: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " emit_vertex(0, 0);\n" + " emit_vertex(1, 0);\n" + " emit_vertex(2, 0);\n" + " emit_vertex(0, 0);\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_TRIANGLE_STRIP: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + /* Imagine a quad made of a tristrip, the comments tell you which + * vertex we are using */ + body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0);\n" /* bottom right */ + " }\n" + " emit_vertex(1, 0);\n" /* top right */ + " emit_vertex(2, 0);\n" /* bottom left */ + " emit_vertex(0, 0);\n" /* bottom right */ + " } else {\n" + " emit_vertex(2, 0);\n" /* bottom left */ + " emit_vertex(1, 0);\n" /* top left */ + " emit_vertex(0, 0);\n" /* top right */ + " }\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_TRIANGLE_FAN: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0);\n" + " }\n" + " emit_vertex(1, 0);\n" + " emit_vertex(2, 0);\n" + " emit_vertex(0, 0);\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_QUADS: + layout_in = "layout(lines_adjacency) in;\n"; + if (polygon_mode == POLY_MODE_LINE) { + layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + body = " emit_vertex(0, 3);\n" + " emit_vertex(1, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(3, 3);\n" + " emit_vertex(0, 3);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " emit_vertex(3, 3);\n" + " emit_vertex(0, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(1, 3);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + case PRIM_TYPE_QUAD_STRIP: + layout_in = "layout(lines_adjacency) in;\n"; + if (polygon_mode == POLY_MODE_LINE) { + layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 3);\n" + " }\n" + " emit_vertex(1, 3);\n" + " emit_vertex(3, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(0, 3);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " emit_vertex(0, 3);\n" + " emit_vertex(1, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(3, 3);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return NULL; + } + if (polygon_mode == POLY_MODE_FILL) { + if (smooth_shading) { + return NULL; + } + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; + body = " emit_vertex(0, 2);\n" + " emit_vertex(1, 2);\n" + " emit_vertex(2, 2);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + + default: + assert(false); + return NULL; + } + + /* generate a geometry shader to support deprecated primitive types */ + assert(layout_in); + assert(layout_out); + assert(body); + MString *s = mstring_new(); + mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400); + mstring_append(s, layout_in); + mstring_append(s, layout_out); + mstring_append(s, "\n"); + pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true); + pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false); + + if (smooth_shading) { + mstring_append(s, + "void emit_vertex(int index, int _unused) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" + // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" + " vtx_inv_w = v_vtx_inv_w[index];\n" + " vtx_inv_w_flat = v_vtx_inv_w[index];\n" + " vtxD0 = v_vtxD0[index];\n" + " vtxD1 = v_vtxD1[index];\n" + " vtxB0 = v_vtxB0[index];\n" + " vtxB1 = v_vtxB1[index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " EmitVertex();\n" + "}\n"); + } else { + mstring_append(s, + "void emit_vertex(int index, int provoking_index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" + // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" + " vtx_inv_w = v_vtx_inv_w[index];\n" + " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" + " vtxD0 = v_vtxD0[provoking_index];\n" + " vtxD1 = v_vtxD1[provoking_index];\n" + " vtxB0 = v_vtxB0[provoking_index];\n" + " vtxB1 = v_vtxB1[provoking_index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " EmitVertex();\n" + "}\n"); + } + + mstring_append(s, "\n" + "void main() {\n"); + mstring_append(s, body); + mstring_append(s, "}\n"); + + return s; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h new file mode 100644 index 0000000000..9ca605be71 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -0,0 +1,34 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, + enum ShaderPolygonMode polygon_back_mode, + enum ShaderPrimitiveMode primitive_mode, + bool smooth_shading, + bool vulkan); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/meson.build b/hw/xbox/nv2a/pgraph/glsl/meson.build new file mode 100644 index 0000000000..82df3f7ede --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/meson.build @@ -0,0 +1,8 @@ +specific_ss.add([files( + 'common.c', + 'geom.c', + 'psh.c', + 'vsh.c', + 'vsh-ff.c', + 'vsh-prog.c', + )]) diff --git a/hw/xbox/nv2a/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c similarity index 90% rename from hw/xbox/nv2a/psh.c rename to hw/xbox/nv2a/pgraph/glsl/psh.c index ca9bffe79d..58ad5cf7ac 100644 --- a/hw/xbox/nv2a/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -3,7 +3,7 @@ * * Copyright (c) 2013 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2020-2021 Matt Borgerson + * Copyright (c) 2020-2024 Matt Borgerson * * Based on: * Cxbx, PixelShader.cpp @@ -34,9 +34,9 @@ #include #include -#include "qapi/qmp/qstring.h" - -#include "shaders_common.h" +#include "common.h" +#include "hw/xbox/nv2a/debug.h" +#include "hw/xbox/nv2a/pgraph/psh.h" #include "psh.h" /* @@ -575,7 +575,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s return NULL; case PS_TEXTUREMODES_PROJECT2D: - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_BUMPENVMAP: case PS_TEXTUREMODES_BUMPENVMAP_LUM: @@ -584,12 +584,15 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_PROJECT3D: case PS_TEXTUREMODES_DOT_STR_3D: + if (state->tex_x8y24[i] && state->vulkan) { + return "usampler2D"; + } if (state->shadow_map[i]) { - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; } return sampler3D; @@ -634,12 +637,28 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa return; } - mstring_append_fmt(vars, - "pT%d.xy *= texScale%d;\n" - "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", - i, i, i, i, i); - + mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i); const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func]; + if (ps->state.rect_tex[i] && ps->state.vulkan) { + if (ps->state.tex_x8y24[i]) { + mstring_append_fmt( + vars, + "uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i); + mstring_append_fmt( + vars, + "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);", + i, i); + } else { + mstring_append_fmt( + vars, + "vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, + i, i, i); + } + } else { + mstring_append_fmt( + vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i, + i); + } // Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float. if (compare_z) { @@ -685,18 +704,69 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]); } +static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex) +{ + // FIXME: Convolution for 2D textures + // FIXME: Quincunx + assert(ps->state.rect_tex[tex]); + + if (ps->state.vulkan) { + mstring_append_fmt(vars, + "vec4 t%d = vec4(0.0);\n" + "for (int i = 0; i < 9; i++) {\n" + " vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n" + " t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex); + } else { + mstring_append_fmt(vars, + "vec4 t%d = vec4(0.0);\n" + "for (int i = 0; i < 9; i++) {\n" + " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n" + " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex); + + } +} + static MString* psh_convert(struct PixelShader *ps) { int i; + const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove + MString *preflight = mstring_new(); - mstring_append(preflight, ps->state.smooth_shading ? - STRUCT_VERTEX_DATA_IN_SMOOTH : - STRUCT_VERTEX_DATA_IN_FLAT); - mstring_append(preflight, "\n"); - mstring_append(preflight, "out vec4 fragColor;\n"); - mstring_append(preflight, "\n"); - mstring_append(preflight, "uniform vec4 fogColor;\n"); + pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan, + ps->state.smooth_shading, true, false, false); + + if (ps->state.vulkan) { + mstring_append_fmt(preflight, + "layout(location = 0) out vec4 fragColor;\n" + "layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING); + } else { + mstring_append_fmt(preflight, + "layout(location = 0) out vec4 fragColor;\n"); + } + + mstring_append_fmt(preflight, "%sfloat alphaRef;\n" + "%svec4 fogColor;\n" + "%sivec4 clipRegion[8];\n", + u, u, u); + for (int i = 0; i < 4; i++) { + mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n" + "%sfloat bumpScale%d;\n" + "%sfloat bumpOffset%d;\n" + "%sfloat texScale%d;\n", + u, i, u, i, u, i, u, i); + } + for (int i = 0; i < 9; i++) { + for (int j = 0; j < 2; j++) { + mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i); + } + } + + if (ps->state.vulkan) { + mstring_append(preflight, "};\n"); + } const char *dotmap_funcs[] = { "dotmap_zero_to_one", @@ -766,22 +836,12 @@ static MString* psh_convert(struct PixelShader *ps) " vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n" " vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n" " vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n" - "vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n" - " vec4 sum = vec4(0.0);\n" - " for (int i = 0; i < 9; i++) {\n" - " sum += gaussian3x3[i]*textureProj(sampler,\n" - " texCoord + vec3(convolution3x3[i], 0.0));\n" - " }\n" - " return sum;\n" - "}\n" ); /* Window Clipping */ MString *clip = mstring_new(); - mstring_append(preflight, "uniform ivec4 clipRegion[8];\n"); - mstring_append_fmt(clip, "/* Window-clip (%s) */\n", - ps->state.window_clip_exclusive ? - "Exclusive" : "Inclusive"); + mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n", + ps->state.window_clip_exclusive ? "Exc" : "Inc"); if (!ps->state.window_clip_exclusive) { mstring_append(clip, "bool clipContained = false;\n"); } @@ -856,23 +916,27 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, false, vars); } else { - const char *lookup = "textureProj"; - if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN) - || (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) { - /* FIXME: Quincunx looks better than Linear and costs less than - * Gaussian, but Gaussian should be plenty fast so use it for - * now. - */ - if (ps->state.rect_tex[i]) { - lookup = "gaussianFilter2DRectProj"; - } else { - NV2A_UNIMPLEMENTED("Convolution for 2D textures"); - } - } apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i); - mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n", - i, lookup, i, i); + if (ps->state.rect_tex[i]) { + if ((ps->state.conv_tex[i] == + CONVOLUTION_FILTER_GAUSSIAN) || + (ps->state.conv_tex[i] == + CONVOLUTION_FILTER_QUINCUNX)) { + apply_convolution_filter(ps, vars, i); + } else { + if (ps->state.vulkan) { + mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", + i, i, i, i); + } else { + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", + i, i, i); + } + } + } else { + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", + i, i, i); + } } break; } @@ -880,6 +944,7 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, true, vars); } else { + assert(!ps->state.rect_tex[i]); apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n", i, i, i); @@ -906,7 +971,6 @@ static MString* psh_convert(struct PixelShader *ps) } case PS_TEXTUREMODES_BUMPENVMAP: assert(i >= 1); - mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i); if (ps->state.snorm_tex[ps->input_tex[i]]) { /* Input color channels already signed (FIXME: May not always want signed textures in this case) */ @@ -925,9 +989,6 @@ static MString* psh_convert(struct PixelShader *ps) break; case PS_TEXTUREMODES_BUMPENVMAP_LUM: assert(i >= 1); - mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i); - mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i); - mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i); if (ps->state.snorm_tex[ps->input_tex[i]]) { /* Input color channels already signed (FIXME: May not always want signed textures in this case) */ @@ -1060,8 +1121,10 @@ static MString* psh_convert(struct PixelShader *ps) break; } - mstring_append_fmt(preflight, "uniform float texScale%d;\n", i); if (sampler_type != NULL) { + if (ps->state.vulkan) { + mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i); + } mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i); /* As this means a texture fetch does happen, do alphakill */ @@ -1091,7 +1154,6 @@ static MString* psh_convert(struct PixelShader *ps) } if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) { - mstring_append_fmt(preflight, "uniform float alphaRef;\n"); if (ps->state.alpha_func == ALPHA_FUNC_NEVER) { mstring_append(ps->code, "discard;\n"); } else { @@ -1112,10 +1174,6 @@ static MString* psh_convert(struct PixelShader *ps) } } - for (i = 0; i < ps->num_const_refs; i++) { - mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]); - } - for (i = 0; i < ps->num_var_refs; i++) { mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]); if (strcmp(ps->var_refs[i], "r0") == 0) { @@ -1128,7 +1186,7 @@ static MString* psh_convert(struct PixelShader *ps) } MString *final = mstring_new(); - mstring_append(final, "#version 330\n\n"); + mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400); mstring_append(final, mstring_get_str(preflight)); mstring_append(final, "void main() {\n"); mstring_append(final, mstring_get_str(clip)); @@ -1175,7 +1233,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out) out->cd_alphablue = flags & 0x40; } -MString *psh_translate(const PshState state) +MString *pgraph_gen_psh_glsl(const PshState state) { int i; struct PixelShader ps; diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h new file mode 100644 index 0000000000..1ae0b0db7e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/psh.h @@ -0,0 +1,41 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2013 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * Based on: + * Cxbx, PixelShader.cpp + * Copyright (c) 2004 Aaron Robinson + * Kingofc + * Xeon, XBD3DPixelShader.cpp + * Copyright (c) 2003 _SF_ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +// FIXME: Move to struct +#define PSH_UBO_BINDING 1 +#define PSH_TEX_BINDING 2 + +MString *pgraph_gen_psh_glsl(const PshState state); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c new file mode 100644 index 0000000000..59749003cd --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -0,0 +1,497 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "vsh-ff.h" + +static void append_skinning_code(MString* str, bool mix, + unsigned int count, const char* type, + const char* output, const char* input, + const char* matrix, const char* swizzle); + +void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header, + MString *body, MString *uniforms) +{ + int i, j; + const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove + + /* generate vertex shader mimicking fixed function */ + mstring_append(header, +"#define position v0\n" +"#define weight v1\n" +"#define normal v2.xyz\n" +"#define diffuse v3\n" +"#define specular v4\n" +"#define fogCoord v5.x\n" +"#define pointSize v6\n" +"#define backDiffuse v7\n" +"#define backSpecular v8\n" +"#define texture0 v9\n" +"#define texture1 v10\n" +"#define texture2 v11\n" +"#define texture3 v12\n" +"#define reserved1 v13\n" +"#define reserved2 v14\n" +"#define reserved3 v15\n" +"\n"); + mstring_append_fmt(uniforms, +"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n" +"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n" +"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u +); + mstring_append(header, +"\n" +GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0)) +GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0)) +"\n" +GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0)) +GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1)) +GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2)) +GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0)) +GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1)) +GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2)) +GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0)) +GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1)) +GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2)) +GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0)) +GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1)) +GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2)) +GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3)) +"\n" +GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0)) +GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1)) +GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2)) +GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3)) +"\n" +GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0)) +GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1)) +GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2)) +GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3)) +"\n" +GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP)) +"\n" +"#define lightAmbientColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n" +"#define lightDiffuseColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n" +"#define lightSpecularColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n" +"\n" +"#define lightSpotFalloff(i) " + "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n" +"#define lightSpotDirection(i) " + "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n" +"\n" +"#define lightLocalRange(i) " + "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n" +"\n" +GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") +GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") +"\n" +); + mstring_append_fmt(uniforms, +"%smat4 invViewport;\n", u); + + /* Skinning */ + unsigned int count; + bool mix; + switch (state->skinning) { + case SKINNING_OFF: + mix = false; count = 0; break; + case SKINNING_1WEIGHTS: + mix = true; count = 2; break; + case SKINNING_2WEIGHTS2MATRICES: + mix = false; count = 2; break; + case SKINNING_2WEIGHTS: + mix = true; count = 3; break; + case SKINNING_3WEIGHTS3MATRICES: + mix = false; count = 3; break; + case SKINNING_3WEIGHTS: + mix = true; count = 4; break; + case SKINNING_4WEIGHTS4MATRICES: + mix = false; count = 4; break; + default: + assert(false); + break; + } + mstring_append_fmt(body, "/* Skinning mode %d */\n", + state->skinning); + + append_skinning_code(body, mix, count, "vec4", + "tPosition", "position", + "modelViewMat", "xyzw"); + append_skinning_code(body, mix, count, "vec3", + "tNormal", "vec4(normal, 0.0)", + "invModelViewMat", "xyz"); + + /* Normalization */ + if (state->normalization) { + mstring_append(body, "tNormal = normalize(tNormal);\n"); + } + + /* Texgen */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + mstring_append_fmt(body, "/* Texgen for stage %d */\n", + i); + /* Set each component individually */ + /* FIXME: could be nicer if some channels share the same texgen */ + for (j = 0; j < 4; j++) { + /* TODO: TexGen View Model missing! */ + char c = "xyzw"[j]; + char cSuffix = "STRQ"[j]; + switch (state->texgen[i][j]) { + case TEXGEN_DISABLE: + mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n", + i, c, i, c); + break; + case TEXGEN_EYE_LINEAR: + mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n", + i, c, cSuffix, i); + break; + case TEXGEN_OBJECT_LINEAR: + mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n", + i, c, cSuffix, i); + break; + case TEXGEN_SPHERE_MAP: + assert(j < 2); /* Channels S,T only! */ + mstring_append(body, "{\n"); + /* FIXME: u, r and m only have to be calculated once */ + mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); + //FIXME: tNormal before or after normalization? Always normalize? + mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); + + /* FIXME: This would consume 1 division fewer and *might* be + * faster than length: + * // [z=1/(2*x) => z=1/x*0.5] + * vec3 ro = r + vec3(0.0, 0.0, 1.0); + * float m = inversesqrt(dot(ro,ro))*0.5; + */ + + mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n"); + mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n", + i, c, c); + mstring_append(body, "}\n"); + break; + case TEXGEN_REFLECTION_MAP: + assert(j < 3); /* Channels S,T,R only! */ + mstring_append(body, "{\n"); + /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */ + mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); + mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); + mstring_append_fmt(body, " oT%d.%c = r.%c;\n", + i, c, c); + mstring_append(body, "}\n"); + break; + case TEXGEN_NORMAL_MAP: + assert(j < 3); /* Channels S,T,R only! */ + mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n", + i, c, c); + break; + default: + assert(false); + break; + } + } + } + + /* Apply texture matrices */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (state->texture_matrix_enable[i]) { + mstring_append_fmt(body, + "oT%d = oT%d * texMat%d;\n", + i, i, i); + } + } + + /* Lighting */ + if (state->lighting) { + + //FIXME: Do 2 passes if we want 2 sided-lighting? + + static char alpha_source_diffuse[] = "diffuse.a"; + static char alpha_source_specular[] = "specular.a"; + static char alpha_source_material[] = "material_alpha"; + const char *alpha_source = alpha_source_diffuse; + if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u); + alpha_source = alpha_source_material; + } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) { + alpha_source = alpha_source_specular; + } + + if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source); + } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) { + mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source); + } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) { + mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source); + } + + mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n"); + if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append(body, "oD0.rgb += sceneAmbientColor;\n"); + } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) { + mstring_append(body, "oD0.rgb += diffuse.rgb;\n"); + } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) { + mstring_append(body, "oD0.rgb += specular.rgb;\n"); + } + + mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n"); + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + if (state->light[i] == LIGHT_OFF) { + continue; + } + + /* FIXME: It seems that we only have to handle the surface colors if + * they are not part of the material [= vertex colors]. + * If they are material the cpu will premultiply light + * colors + */ + + mstring_append_fmt(body, "/* Light %d */ {\n", i); + + if (state->light[i] == LIGHT_LOCAL + || state->light[i] == LIGHT_SPOT) { + + mstring_append_fmt(uniforms, + "%svec3 lightLocalPosition%d;\n" + "%svec3 lightLocalAttenuation%d;\n", + u, i, u, i); + mstring_append_fmt(body, + " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n" + " float d = length(VP);\n" +//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights? + " VP = normalize(VP);\n" + " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n" + " + lightLocalAttenuation%d.y * d\n" + " + lightLocalAttenuation%d.z * d * d);\n" + " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */ + " float nDotVP = max(0.0, dot(tNormal, VP));\n" + " float nDotHV = max(0.0, dot(tNormal, halfVector));\n", + i, i, i, i); + + } + + switch(state->light[i]) { + case LIGHT_INFINITE: + + /* lightLocalRange will be 1e+30 here */ + + mstring_append_fmt(uniforms, + "%svec3 lightInfiniteHalfVector%d;\n" + "%svec3 lightInfiniteDirection%d;\n", + u, i, u, i); + mstring_append_fmt(body, + " float attenuation = 1.0;\n" + " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n" + " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n", + i, i); + + /* FIXME: Do specular */ + + /* FIXME: tBackDiffuse */ + + break; + case LIGHT_LOCAL: + /* Everything done already */ + break; + case LIGHT_SPOT: + /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */ + mstring_append_fmt(body, + " vec4 spotDir = lightSpotDirection(%d);\n" + " float invScale = 1/length(spotDir.xyz);\n" + " float cosHalfPhi = -invScale*spotDir.w;\n" + " float cosHalfTheta = invScale + cosHalfPhi;\n" + " float spotDirDotVP = dot(spotDir.xyz, VP);\n" + " float rho = invScale*spotDirDotVP;\n" + " if (rho > cosHalfTheta) {\n" + " } else if (rho <= cosHalfPhi) {\n" + " attenuation = 0.0;\n" + " } else {\n" + " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */ + " }\n", + i); + break; + default: + assert(false); + break; + } + + mstring_append_fmt(body, + " float pf;\n" + " if (nDotVP == 0.0) {\n" + " pf = 0.0;\n" + " } else {\n" + " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n" + " }\n" + " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n" + " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n" + " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n", + i, i, i); + + mstring_append(body, + " oD0.xyz += lightAmbient;\n"); + + switch (state->diffuse_src) { + case MATERIAL_COLOR_SRC_MATERIAL: + mstring_append(body, + " oD0.xyz += lightDiffuse;\n"); + break; + case MATERIAL_COLOR_SRC_DIFFUSE: + mstring_append(body, + " oD0.xyz += diffuse.xyz * lightDiffuse;\n"); + break; + case MATERIAL_COLOR_SRC_SPECULAR: + mstring_append(body, + " oD0.xyz += specular.xyz * lightDiffuse;\n"); + break; + } + + mstring_append(body, + " oD1.xyz += specular.xyz * lightSpecular;\n"); + + mstring_append(body, "}\n"); + } + } else { + mstring_append(body, " oD0 = diffuse;\n"); + mstring_append(body, " oD1 = specular;\n"); + } + mstring_append(body, " oB0 = backDiffuse;\n"); + mstring_append(body, " oB1 = backSpecular;\n"); + + /* Fog */ + if (state->fog_enable) { + + /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */ + switch(state->foggen) { + case FOGGEN_SPEC_ALPHA: + /* FIXME: Do we have to clamp here? */ + mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n"); + break; + case FOGGEN_RADIAL: + mstring_append(body, " float fogDistance = length(tPosition.xyz);\n"); + break; + case FOGGEN_PLANAR: + case FOGGEN_ABS_PLANAR: + mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n"); + if (state->foggen == FOGGEN_ABS_PLANAR) { + mstring_append(body, " fogDistance = abs(fogDistance);\n"); + } + break; + case FOGGEN_FOG_X: + mstring_append(body, " float fogDistance = fogCoord;\n"); + break; + default: + assert(false); + break; + } + + } + + /* If skinning is off the composite matrix already includes the MV matrix */ + if (state->skinning == SKINNING_OFF) { + mstring_append(body, " tPosition = position;\n"); + } + + mstring_append(body, + " oPos = invViewport * (tPosition * compositeMat);\n" + ); + + if (state->vulkan) { + mstring_append(body, " oPos.y *= -1;\n"); + } else { + mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n"); + } + + /* FIXME: Testing */ + if (state->point_params_enable) { + mstring_append_fmt( + body, + " float d_e = length(position * modelViewMat0);\n" + " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n", + state->point_params[0], state->point_params[1], state->point_params[2], + state->point_params[6]); + mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n", + state->point_params[3], state->point_params[7], + state->surface_scale_factor); + } else { + mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, + state->surface_scale_factor); + } + + mstring_append(body, + " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" + " vtx_inv_w = 1.0;\n" + " } else {\n" + " vtx_inv_w = 1.0 / oPos.w;\n" + " }\n" + " vtx_inv_w_flat = vtx_inv_w;\n"); +} + +static void append_skinning_code(MString* str, bool mix, + unsigned int count, const char* type, + const char* output, const char* input, + const char* matrix, const char* swizzle) +{ + if (count == 0) { + mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n", + type, output, input, matrix, swizzle); + } else { + mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type); + if (mix) { + /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */ + mstring_append(str, "{\n" + " float weight_i;\n" + " float weight_n = 1.0;\n"); + int i; + for (i = 0; i < count; i++) { + if (i < (count - 1)) { + char c = "xyzw"[i]; + mstring_append_fmt(str, " weight_i = weight.%c;\n" + " weight_n -= weight_i;\n", + c); + } else { + mstring_append(str, " weight_i = weight_n;\n"); + } + mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n", + output, input, matrix, i, swizzle); + } + mstring_append(str, "}\n"); + } else { + /* Individual weights */ + int i; + for (i = 0; i < count; i++) { + char c = "xyzw"[i]; + mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n", + output, input, matrix, i, swizzle, c); + } + } + } +} diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h new file mode 100644 index 0000000000..949bf54252 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h @@ -0,0 +1,31 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header, + MString *body, MString *uniforms); + +#endif diff --git a/hw/xbox/nv2a/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c similarity index 97% rename from hw/xbox/nv2a/vsh.c rename to hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 0e4cf314bc..7bebed71e8 100644 --- a/hw/xbox/nv2a/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -1,5 +1,5 @@ /* - * QEMU Geforce NV2A vertex shader translation + * Geforce NV2A PGRAPH GLSL Shader Generator * * Copyright (c) 2014 Jannik Vogel * Copyright (c) 2012 espes @@ -32,8 +32,9 @@ #include #include -#include "shaders_common.h" -#include "vsh.h" +#include "hw/xbox/nv2a/pgraph/vsh.h" +#include "common.h" +#include "vsh-prog.h" #define VSH_D3DSCM_CORRECTION 96 @@ -794,10 +795,11 @@ static const char* vsh_header = " return t;\n" "}\n"; -void vsh_translate(uint16_t version, +void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, unsigned int length, bool z_perspective, + bool vulkan, MString *header, MString *body) { @@ -843,14 +845,30 @@ void vsh_translate(uint16_t version, * TODO: the pixel-center co-ordinate differences should handled */ " oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n" - " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n" - ); + ); + + if (vulkan) { + mstring_append(body, + " oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n"); + } else { + mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) " + "/ surfaceSize.y;\n"); + } + if (z_perspective) { mstring_append(body, " oPos.z = oPos.w;\n"); } + + mstring_append(body, + " if (clipRange.y != clipRange.x) {\n"); + if (vulkan) { + mstring_append(body, " oPos.z /= clipRange.y;\n"); + } else { + mstring_append(body, + " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y " + "- clipRange.x)) - 1;\n"); + } mstring_append(body, - " if (clipRange.y != clipRange.x) {\n" - " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n" " }\n" /* Correct for the perspective divide */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h new file mode 100644 index 0000000000..84d8141c5e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h @@ -0,0 +1,35 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2014 Jannik Vogel + * Copyright (c) 2012 espes + * + * Based on: + * Cxbx, VertexShader.cpp + * Copyright (c) 2004 Aaron Robinson + * Kingofc + * Dxbx, uPushBuffer.pas + * Copyright (c) 2007 Shadow_tj, PatrickvL + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H + +void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, + unsigned int length, bool z_perspective, + bool vulkan, MString *header, MString *body); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c new file mode 100644 index 0000000000..4fcc09cac5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -0,0 +1,274 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "vsh.h" +#include "vsh-ff.h" +#include "vsh-prog.h" +#include + +MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) +{ + int i; + MString *output = mstring_new(); + mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400); + + MString *header = mstring_from_str(""); + + MString *uniforms = mstring_from_str(""); + + const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove + + mstring_append_fmt(uniforms, + "%svec4 clipRange;\n" + "%svec2 surfaceSize;\n" + "%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n" + "%svec2 fogParam;\n", + u, u, u, u + ); + + mstring_append(header, + GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG)) + GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT)) + GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT)) + GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT)) + GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT)) + + "\n" + "vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n" + "\n" + "vec4 decompress_11_11_10(int cmp) {\n" + " float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n" + " float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n" + " float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n" + " return vec4(x, y, z, 1);\n" + "}\n"); + + pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading, + false, prefix_outputs, false); + + if (prefix_outputs) { + mstring_append(header, + "#define vtx_inv_w v_vtx_inv_w\n" + "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" + "#define vtxD0 v_vtxD0\n" + "#define vtxD1 v_vtxD1\n" + "#define vtxB0 v_vtxB0\n" + "#define vtxB1 v_vtxB1\n" + "#define vtxFog v_vtxFog\n" + "#define vtxT0 v_vtxT0\n" + "#define vtxT1 v_vtxT1\n" + "#define vtxT2 v_vtxT2\n" + "#define vtxT3 v_vtxT3\n" + ); + } + mstring_append(header, "\n"); + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + + bool is_uniform = state->uniform_attrs & (1 << i); + bool is_compressed = state->compressed_attrs & (1 << i); + + assert(!(is_uniform && is_compressed)); + + if (is_uniform) { + mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i); + } else { + if (state->compressed_attrs & (1 << i)) { + mstring_append_fmt(header, + "layout(location = %d) in int v%d_cmp;\n", i, i); + } else if (state->swizzle_attrs & (1 << i)) { + mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n", + i, i); + } else { + mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n", + i, i); + } + } + } + mstring_append(header, "\n"); + + MString *body = mstring_from_str("void main() {\n"); + + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + if (state->compressed_attrs & (1 << i)) { + mstring_append_fmt( + body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i); + } + + if (state->swizzle_attrs & (1 << i)) { + mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i); + } + + } + + if (state->fixed_function) { + pgraph_gen_vsh_ff_glsl(state, header, body, uniforms); + } else if (state->vertex_program) { + pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS, + (uint32_t *)state->program_data, + state->program_length, state->z_perspective, + state->vulkan, header, body); + } else { + assert(false); + } + + + /* Fog */ + + if (state->fog_enable) { + + if (state->vertex_program) { + /* FIXME: Does foggen do something here? Let's do some tracking.. + * + * "RollerCoaster Tycoon" has + * state->vertex_program = true; state->foggen == FOGGEN_PLANAR + * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z + */ + mstring_append(body, " float fogDistance = oFog.x;\n"); + } + + /* FIXME: Do this per pixel? */ + + switch (state->fog_mode) { + case FOG_MODE_LINEAR: + case FOG_MODE_LINEAR_ABS: + + /* f = (end - d) / (end - start) + * fogParam.y = -1 / (end - start) + * fogParam.x = 1 - end * fogParam.y; + */ + + mstring_append(body, + " if (isinf(fogDistance)) {\n" + " fogDistance = 0.0;\n" + " }\n" + ); + mstring_append(body, " float fogFactor = fogParam.x + fogDistance * fogParam.y;\n"); + mstring_append(body, " fogFactor -= 1.0;\n"); + break; + case FOG_MODE_EXP: + mstring_append(body, + " if (isinf(fogDistance)) {\n" + " fogDistance = 0.0;\n" + " }\n" + ); + /* fallthru */ + case FOG_MODE_EXP_ABS: + + /* f = 1 / (e^(d * density)) + * fogParam.y = -density / (2 * ln(256)) + * fogParam.x = 1.5 + */ + + mstring_append(body, " float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n"); + mstring_append(body, " fogFactor -= 1.5;\n"); + break; + case FOG_MODE_EXP2: + case FOG_MODE_EXP2_ABS: + + /* f = 1 / (e^((d * density)^2)) + * fogParam.y = -density / (2 * sqrt(ln(256))) + * fogParam.x = 1.5 + */ + + mstring_append(body, " float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n"); + mstring_append(body, " fogFactor -= 1.5;\n"); + break; + default: + assert(false); + break; + } + /* Calculate absolute for the modes which need it */ + switch (state->fog_mode) { + case FOG_MODE_LINEAR_ABS: + case FOG_MODE_EXP_ABS: + case FOG_MODE_EXP2_ABS: + mstring_append(body, " fogFactor = abs(fogFactor);\n"); + break; + default: + break; + } + + mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n"); + } else { + /* FIXME: Is the fog still calculated / passed somehow?! + */ + mstring_append(body, " oFog.xyzw = vec4(1.0);\n"); + } + + /* Set outputs */ + const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; + mstring_append_fmt(body, "\n" + " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" + " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" + " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" + " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" + " vtxFog = oFog.x * vtx_inv_w;\n" + " vtxT0 = oT0 * vtx_inv_w;\n" + " vtxT1 = oT1 * vtx_inv_w;\n" + " vtxT2 = oT2 * vtx_inv_w;\n" + " vtxT3 = oT3 * vtx_inv_w;\n" + " gl_Position = oPos;\n" + " gl_PointSize = oPts.x;\n" + // " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near + // " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far + "\n" + "}\n", + shade_model_mult, + shade_model_mult, + shade_model_mult, + shade_model_mult); + + + /* Return combined header + source */ + if (state->vulkan) { + mstring_append_fmt( + output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n", + VSH_UBO_BINDING, mstring_get_str(uniforms)); + // FIXME: Only needed for vk, for gl we use glVertexAttrib + mstring_append_fmt(output, + "layout(push_constant) uniform PushConstants {\n" + "vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n" + "};\n\n"); + } else { + mstring_append( + output, mstring_get_str(uniforms)); + } + + mstring_append(output, mstring_get_str(header)); + mstring_unref(header); + + mstring_append(output, mstring_get_str(body)); + mstring_unref(body); + return output; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.h b/hw/xbox/nv2a/pgraph/glsl/vsh.h new file mode 100644 index 0000000000..584e1997e3 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.h @@ -0,0 +1,33 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +// FIXME: Move to struct +#define VSH_UBO_BINDING 0 + +MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs); + +#endif diff --git a/hw/xbox/nv2a/pgraph/meson.build b/hw/xbox/nv2a/pgraph/meson.build new file mode 100644 index 0000000000..5b8bc181c3 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/meson.build @@ -0,0 +1,19 @@ +specific_ss.add(files( + 'pgraph.c', + 'profile.c', + 'rdi.c', + 's3tc.c', + 'shaders.c', + 'swizzle.c', + 'texture.c', + 'vertex.c', + )) +if have_renderdoc + specific_ss.add(files('debug_renderdoc.c')) +endif +subdir('thirdparty') +subdir('null') +subdir('gl') +subdir('glsl') +subdir('vk') +specific_ss.add(nv2a_vsh_cpu) diff --git a/hw/xbox/nv2a/pgraph_methods.h b/hw/xbox/nv2a/pgraph/methods.h similarity index 100% rename from hw/xbox/nv2a/pgraph_methods.h rename to hw/xbox/nv2a/pgraph/methods.h diff --git a/hw/xbox/nv2a/pgraph/null/meson.build b/hw/xbox/nv2a/pgraph/null/meson.build new file mode 100644 index 0000000000..e2731a13d9 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/null/meson.build @@ -0,0 +1,3 @@ +specific_ss.add([sdl, files( + 'renderer.c', + )]) diff --git a/hw/xbox/nv2a/pgraph/null/renderer.c b/hw/xbox/nv2a/pgraph/null/renderer.c new file mode 100644 index 0000000000..9a9c2512cc --- /dev/null +++ b/hw/xbox/nv2a/pgraph/null/renderer.c @@ -0,0 +1,146 @@ +/* + * Geforce NV2A PGRAPH Null Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "hw/hw.h" +#include "hw/xbox/nv2a/nv2a_int.h" + +static void pgraph_null_sync(NV2AState *d) +{ + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +static void pgraph_null_flush(NV2AState *d) +{ + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_null_process_pending(NV2AState *d) +{ + if ( + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) + ) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_null_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_null_flush(d); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_null_clear_report_value(NV2AState *d) +{ +} + +static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter) +{ +} + +static void pgraph_null_draw_begin(NV2AState *d) +{ +} + +static void pgraph_null_draw_end(NV2AState *d) +{ +} + +static void pgraph_null_flip_stall(NV2AState *d) +{ +} + +static void pgraph_null_flush_draw(NV2AState *d) +{ +} + +static void pgraph_null_get_report(NV2AState *d, uint32_t parameter) +{ + pgraph_write_zpass_pixel_cnt_report(d, parameter, 0); +} + +static void pgraph_null_image_blit(NV2AState *d) +{ +} + +static void pgraph_null_pre_savevm_trigger(NV2AState *d) +{ +} + +static void pgraph_null_pre_savevm_wait(NV2AState *d) +{ +} + +static void pgraph_null_pre_shutdown_trigger(NV2AState *d) +{ +} + +static void pgraph_null_pre_shutdown_wait(NV2AState *d) +{ +} + +static void pgraph_null_process_pending_reports(NV2AState *d) +{ +} + +static void pgraph_null_surface_update(NV2AState *d, bool upload, + bool color_write, bool zeta_write) +{ +} + +static void pgraph_null_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->null_renderer_state = NULL; +} + +static PGRAPHRenderer pgraph_null_renderer = { + .type = CONFIG_DISPLAY_RENDERER_NULL, + .name = "Null", + .ops = { + .init = pgraph_null_init, + .clear_report_value = pgraph_null_clear_report_value, + .clear_surface = pgraph_null_clear_surface, + .draw_begin = pgraph_null_draw_begin, + .draw_end = pgraph_null_draw_end, + .flip_stall = pgraph_null_flip_stall, + .flush_draw = pgraph_null_flush_draw, + .get_report = pgraph_null_get_report, + .image_blit = pgraph_null_image_blit, + .pre_savevm_trigger = pgraph_null_pre_savevm_trigger, + .pre_savevm_wait = pgraph_null_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_null_pre_shutdown_wait, + .process_pending = pgraph_null_process_pending, + .process_pending_reports = pgraph_null_process_pending_reports, + .surface_update = pgraph_null_surface_update, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_null_renderer); +} diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c new file mode 100644 index 0000000000..0062efa15f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -0,0 +1,2874 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" +#include "ui/xemu-settings.h" +#include "util.h" +#include "swizzle.h" +#include "nv2a_vsh_emulator.h" + +#define PG_GET_MASK(reg, mask) GET_MASK(pgraph_reg_r(pg, reg), mask) +#define PG_SET_MASK(reg, mask, value) \ + do { \ + uint32_t rv = pgraph_reg_r(pg, reg); \ + SET_MASK(rv, mask, value); \ + pgraph_reg_w(pg, reg, rv); \ + } while (0) + + +NV2AState *g_nv2a; + +uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + PGRAPHState *pg = &d->pgraph; + + qemu_mutex_lock(&pg->lock); + + uint64_t r = 0; + switch (addr) { + case NV_PGRAPH_INTR: + r = pg->pending_interrupts; + break; + case NV_PGRAPH_INTR_EN: + r = pg->enabled_interrupts; + break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_SELECT); + unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS); + + r = pgraph_rdi_read(pg, select, address); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + PG_SET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } + default: + r = pgraph_reg_r(pg, addr); + break; + } + + qemu_mutex_unlock(&pg->lock); + + nv2a_reg_log_read(NV_PGRAPH, addr, size, r); + return r; +} + +void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + PGRAPHState *pg = &d->pgraph; + + nv2a_reg_log_write(NV_PGRAPH, addr, size, val); + + qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here + qemu_mutex_lock(&pg->lock); + + switch (addr) { + case NV_PGRAPH_INTR: + pg->pending_interrupts &= ~val; + + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) { + pg->waiting_for_nop = false; + } + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) { + pg->waiting_for_context_switch = false; + } + pfifo_kick(d); + break; + case NV_PGRAPH_INTR_EN: + pg->enabled_interrupts = val; + break; + case NV_PGRAPH_INCREMENT: + if (val & NV_PGRAPH_INCREMENT_READ_3D) { + PG_SET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_READ_3D, + (PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_READ_3D)+1) + % PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_MODULO_3D) ); + nv2a_profile_increment(); + pfifo_kick(d); + } + break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_SELECT); + unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS); + + pgraph_rdi_write(pg, select, address, val); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + PG_SET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } + case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { + hwaddr context_address = + PG_GET_MASK(NV_PGRAPH_CHANNEL_CTX_POINTER, + NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; + + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { +#ifdef DEBUG_NV2A + unsigned pgraph_channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); +#endif + NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", + pgraph_channel_id, context_address); + + assert(context_address < memory_region_size(&d->ramin)); + + uint8_t *context_ptr = d->ramin_ptr + context_address; + uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); + + NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); + + pgraph_reg_w(pg, NV_PGRAPH_CTX_USER, context_user); + // pgraph_set_context_user(d, context_user); + } + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { + /* do stuff ... */ + } + + break; + } + default: + pgraph_reg_w(pg, addr, val); + break; + } + + // events + switch (addr) { + case NV_PGRAPH_FIFO: + pfifo_kick(d); + break; + } + + qemu_mutex_unlock(&pg->lock); + qemu_mutex_unlock(&d->pfifo.lock); +} + +void pgraph_context_switch(NV2AState *d, unsigned int channel_id) +{ + PGRAPHState *pg = &d->pgraph; + + bool channel_valid = + pgraph_reg_r(pg, NV_PGRAPH_CTX_CONTROL) & NV_PGRAPH_CTX_CONTROL_CHID; + unsigned pgraph_channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); + + bool valid = channel_valid && pgraph_channel_id == channel_id; + if (!valid) { + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, + NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); + + NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id); + + /* TODO: hardware context switching */ + assert(!PG_GET_MASK(NV_PGRAPH_DEBUG_3, + NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); + + pg->waiting_for_context_switch = true; + qemu_mutex_unlock(&pg->lock); + qemu_mutex_lock_iothread(); + pg->pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH; + nv2a_update_irq(d); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&pg->lock); + } +} + +static const PGRAPHRenderer *renderers[CONFIG_DISPLAY_RENDERER__COUNT]; + +void pgraph_renderer_register(const PGRAPHRenderer *renderer) +{ + assert(renderer->type < CONFIG_DISPLAY_RENDERER__COUNT); + renderers[renderer->type] = renderer; +} + +void pgraph_init(NV2AState *d) +{ + g_nv2a = d; + + PGRAPHState *pg = &d->pgraph; + qemu_mutex_init(&pg->lock); + qemu_event_init(&pg->sync_complete, false); + qemu_event_init(&pg->flush_complete, false); + + pg->frame_time = 0; + pg->draw_time = 0; + + pg->material_alpha = 0.0f; + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); + pg->primitive_mode = PRIM_TYPE_INVALID; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attribute = &pg->vertex_attributes[i]; + attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH + * sizeof(float) * 4); + attribute->inline_buffer_populated = false; + } + + pgraph_clear_dirty_reg_map(pg); + + pg->renderer = renderers[g_config.display.renderer]; + pg->renderer->ops.init(d); +} + +void pgraph_clear_dirty_reg_map(PGRAPHState *pg) +{ + memset(pg->regs_dirty, 0, sizeof(pg->regs_dirty)); +} + +void pgraph_init_thread(NV2AState *d) +{ + if (d->pgraph.renderer->ops.init_thread) { + d->pgraph.renderer->ops.init_thread(d); + } +} + +static CONFIG_DISPLAY_RENDERER get_default_renderer(void) +{ +#ifdef CONFIG_OPENGL + if (renderers[CONFIG_DISPLAY_RENDERER_OPENGL]) { + return CONFIG_DISPLAY_RENDERER_OPENGL; + } +#endif +#ifdef CONFIG_VULKAN + if (renderers[CONFIG_DISPLAY_RENDERER_VULKAN]) { + return CONFIG_DISPLAY_RENDERER_VULKAN; + } +#endif + fprintf(stderr, "Warning: No available renderer\n"); + return CONFIG_DISPLAY_RENDERER_NULL; +} + +void nv2a_context_init(void) +{ + if (!renderers[g_config.display.renderer]) { + g_config.display.renderer = get_default_renderer(); + fprintf(stderr, + "Warning: Configured renderer unavailable. Switching to %s.\n", + renderers[g_config.display.renderer]->name); + } + + if (renderers[g_config.display.renderer]->ops.early_context_init) { + renderers[g_config.display.renderer]->ops.early_context_init(); + } +} + +void pgraph_destroy(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + + if (pg->renderer->ops.finalize) { + pg->renderer->ops.finalize(d); + } + + qemu_mutex_destroy(&pg->lock); +} + +int nv2a_get_framebuffer_surface(void) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.get_framebuffer_surface) { + return d->pgraph.renderer->ops.get_framebuffer_surface(d); + } + + return 0; +} + +void nv2a_set_surface_scale_factor(unsigned int scale) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.set_surface_scale_factor) { + d->pgraph.renderer->ops.set_surface_scale_factor(d, scale); + } +} + +unsigned int nv2a_get_surface_scale_factor(void) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.get_surface_scale_factor) { + return d->pgraph.renderer->ops.get_surface_scale_factor(d); + } + + return 1; +} + +#define METHOD_ADDR(gclass, name) \ + gclass ## _ ## name +#define METHOD_ADDR_TO_INDEX(x) ((x)>>2) +#define METHOD_NAME_STR(gclass, name) \ + tostring(gclass ## _ ## name) +#define METHOD_FUNC_NAME(gclass, name) \ + pgraph_ ## gclass ## _ ## name ## _handler +#define METHOD_HANDLER_ARG_DECL \ + NV2AState *d, PGRAPHState *pg, \ + unsigned int subchannel, unsigned int method, \ + uint32_t parameter, uint32_t *parameters, \ + size_t num_words_available, size_t *num_words_consumed, bool inc +#define METHOD_HANDLER_ARGS \ + d, pg, subchannel, method, parameter, parameters, \ + num_words_available, num_words_consumed, inc +#define DEF_METHOD_PROTO(gclass, name) \ + static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL) + +#define DEF_METHOD(gclass, name) \ + DEF_METHOD_PROTO(gclass, name); +#define DEF_METHOD_RANGE(gclass, name, range) \ + DEF_METHOD_PROTO(gclass, name); +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */ +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + DEF_METHOD_PROTO(gclass, name); +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 + +typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL); +static const struct { + uint32_t base; + const char *name; + MethodFunc handler; +} pgraph_kelvin_methods[0x800] = { +#define DEF_METHOD(gclass, name) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_RANGE(gclass, name, range) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \ + ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride) +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 +}; + +#define METHOD_RANGE_END_NAME(gclass, name) \ + pgraph_ ## gclass ## _ ## name ## __END +#define DEF_METHOD(gclass, name) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4; +#define DEF_METHOD_RANGE(gclass, name, range) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4*range; +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */ +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4*stride; +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 + +static void pgraph_method_log(unsigned int subchannel, + unsigned int graphics_class, + unsigned int method, uint32_t parameter) +{ + const char *method_name = "?"; + static unsigned int last = 0; + static unsigned int count = 0; + + if (last == NV097_ARRAY_ELEMENT16 && method != last) { + method_name = "NV097_ARRAY_ELEMENT16"; + trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last, + method_name, count); + } + + if (method != NV097_ARRAY_ELEMENT16) { + uint32_t base = method; + switch (graphics_class) { + case NV_KELVIN_PRIMITIVE: { + int idx = METHOD_ADDR_TO_INDEX(method); + if (idx < ARRAY_SIZE(pgraph_kelvin_methods) && + pgraph_kelvin_methods[idx].handler) { + method_name = pgraph_kelvin_methods[idx].name; + base = pgraph_kelvin_methods[idx].base; + } + break; + } + default: + break; + } + + uint32_t offset = method - base; + trace_nv2a_pgraph_method(subchannel, graphics_class, method, + method_name, offset, parameter); + } + + if (method == last) { + count++; + } else { + count = 0; + } + last = method; +} + +static void pgraph_method_inc(MethodFunc handler, uint32_t end, + METHOD_HANDLER_ARG_DECL) +{ + if (!inc) { + handler(METHOD_HANDLER_ARGS); + return; + } + size_t count = MIN(num_words_available, (end - method) / 4); + for (size_t i = 0; i < count; i++) { + parameter = ldl_le_p(parameters + i); + if (i) { + pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, + parameter); + } + handler(METHOD_HANDLER_ARGS); + method += 4; + } + *num_words_consumed = count; +} + +static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL) +{ + if (inc) { + handler(METHOD_HANDLER_ARGS); + return; + } + + for (size_t i = 0; i < num_words_available; i++) { + parameter = ldl_le_p(parameters + i); + if (i) { + pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, + parameter); + } + handler(METHOD_HANDLER_ARGS); + } + *num_words_consumed = num_words_available; +} + +#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int) +#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int) +#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name) + +#define DEF_METHOD_INC(gclass, name) \ + DEF_METHOD_INT(gclass, name); \ + DEF_METHOD(gclass, name) \ + { \ + pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \ + METHOD_RANGE_END_NAME(gclass, name), \ + METHOD_HANDLER_ARGS); \ + } \ + DEF_METHOD_INT(gclass, name) + +#define DEF_METHOD_NON_INC(gclass, name) \ + DEF_METHOD_INT(gclass, name); \ + DEF_METHOD(gclass, name) \ + { \ + pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \ + METHOD_HANDLER_ARGS); \ + } \ + DEF_METHOD_INT(gclass, name) + +int pgraph_method(NV2AState *d, unsigned int subchannel, + unsigned int method, uint32_t parameter, + uint32_t *parameters, size_t num_words_available, + size_t max_lookahead_words, bool inc) +{ + int num_processed = 1; + + PGRAPHState *pg = &d->pgraph; + + bool channel_valid = + PG_GET_MASK(NV_PGRAPH_CTX_CONTROL, NV_PGRAPH_CTX_CONTROL_CHID); + assert(channel_valid); + + ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + assert(subchannel < 8); + + if (method == NV_SET_OBJECT) { + assert(parameter < memory_region_size(&d->ramin)); + uint8_t *obj_ptr = d->ramin_ptr + parameter; + + uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr); + uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4)); + uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8)); + uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12)); + uint32_t ctx_5 = parameter; + + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4, ctx_1); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4, ctx_2); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4, ctx_3); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4, ctx_4); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4, ctx_5); + } + + // is this right? + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH1, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH2, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH3, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH4, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH5, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4)); + + uint32_t graphics_class = PG_GET_MASK(NV_PGRAPH_CTX_SWITCH1, + NV_PGRAPH_CTX_SWITCH1_GRCLASS); + + pgraph_method_log(subchannel, graphics_class, method, parameter); + + if (subchannel != 0) { + // catches context switching issues on xbox d3d + assert(graphics_class != 0x97); + } + + /* ugly switch for now */ + switch (graphics_class) { + case NV_BETA: { + switch (method) { + case NV012_SET_OBJECT: + beta->object_instance = parameter; + break; + case NV012_SET_BETA: + if (parameter & 0x80000000) { + beta->beta = 0; + } else { + // The parameter is a signed fixed-point number with a sign bit + // and 31 fractional bits. Note that negative values are clamped + // to 0, and only 8 fractional bits are actually implemented in + // hardware. + beta->beta = parameter & 0x7f800000; + } + break; + default: + goto unhandled; + } + break; + } + case NV_CONTEXT_PATTERN: { + switch (method) { + case NV044_SET_MONOCHROME_COLOR0: + pgraph_reg_w(pg, NV_PGRAPH_PATT_COLOR0, parameter); + break; + default: + goto unhandled; + } + break; + } + case NV_CONTEXT_SURFACES_2D: { + switch (method) { + case NV062_SET_OBJECT: + context_surfaces_2d->object_instance = parameter; + break; + case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE: + context_surfaces_2d->dma_image_source = parameter; + break; + case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN: + context_surfaces_2d->dma_image_dest = parameter; + break; + case NV062_SET_COLOR_FORMAT: + context_surfaces_2d->color_format = parameter; + break; + case NV062_SET_PITCH: + context_surfaces_2d->source_pitch = parameter & 0xFFFF; + context_surfaces_2d->dest_pitch = parameter >> 16; + break; + case NV062_SET_OFFSET_SOURCE: + context_surfaces_2d->source_offset = parameter & 0x07FFFFFF; + break; + case NV062_SET_OFFSET_DESTIN: + context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF; + break; + default: + goto unhandled; + } + break; + } + case NV_IMAGE_BLIT: { + switch (method) { + case NV09F_SET_OBJECT: + image_blit->object_instance = parameter; + break; + case NV09F_SET_CONTEXT_SURFACES: + image_blit->context_surfaces = parameter; + break; + case NV09F_SET_OPERATION: + image_blit->operation = parameter; + break; + case NV09F_CONTROL_POINT_IN: + image_blit->in_x = parameter & 0xFFFF; + image_blit->in_y = parameter >> 16; + break; + case NV09F_CONTROL_POINT_OUT: + image_blit->out_x = parameter & 0xFFFF; + image_blit->out_y = parameter >> 16; + break; + case NV09F_SIZE: + image_blit->width = parameter & 0xFFFF; + image_blit->height = parameter >> 16; + + if (image_blit->width && image_blit->height) { + d->pgraph.renderer->ops.image_blit(d); + } + break; + default: + goto unhandled; + } + break; + } + case NV_KELVIN_PRIMITIVE: { + MethodFunc handler = + pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler; + if (handler == NULL) { + goto unhandled; + } + size_t num_words_consumed = 1; + handler(d, pg, subchannel, method, parameter, parameters, + num_words_available, &num_words_consumed, inc); + + /* Squash repeated BEGIN,DRAW_ARRAYS,END */ + #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd)) + #define LAP(i, prm) (parameters[i*2+2] == (prm)) + #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm)) + + if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) && + pg->inline_elements_length == 0 && + pg->draw_arrays_length < + (ARRAY_SIZE(pg->draw_arrays_start) - 1) && + LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) && + LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) && + LAM(2, NV097_DRAW_ARRAYS)) { + num_words_consumed += 4; + pg->draw_arrays_prevent_connect = true; + } + + #undef LAM + #undef LAP + #undef LAMP + + num_processed = num_words_consumed; + break; + } + default: + goto unhandled; + } + + return num_processed; + +unhandled: + trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class, + method, parameter); + return num_processed; +} + +DEF_METHOD(NV097, SET_OBJECT) +{ + pg->kelvin.object_instance = parameter; +} + +DEF_METHOD(NV097, NO_OPERATION) +{ + /* The bios uses nop as a software method call - + * it seems to expect a notify interrupt if the parameter isn't 0. + * According to a nouveau guy it should still be a nop regardless + * of the parameter. It's possible a debug register enables this, + * but nothing obvious sticks out. Weird. + */ + if (parameter == 0) { + return; + } + + unsigned channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); + + assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)); + + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_CHID, + channel_id); + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_SUBCH, + subchannel); + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_MTHD, + method); + pgraph_reg_w(pg, NV_PGRAPH_TRAPPED_DATA_LOW, parameter); + pgraph_reg_w(pg, NV_PGRAPH_NSOURCE, + NV_PGRAPH_NSOURCE_NOTIFICATION); /* TODO: check this */ + pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; + pg->waiting_for_nop = true; + + qemu_mutex_unlock(&pg->lock); + qemu_mutex_lock_iothread(); + nv2a_update_irq(d); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&pg->lock); +} + +DEF_METHOD(NV097, WAIT_FOR_IDLE) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); +} + +DEF_METHOD(NV097, SET_FLIP_READ) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_READ_3D, + parameter); +} + +DEF_METHOD(NV097, SET_FLIP_WRITE) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D, + parameter); +} + +DEF_METHOD(NV097, SET_FLIP_MODULO) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_MODULO_3D, + parameter); +} + +DEF_METHOD(NV097, FLIP_INCREMENT_WRITE) +{ + uint32_t old = + PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D); + + PG_SET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_WRITE_3D, + (PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_WRITE_3D)+1) + % PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_MODULO_3D) ); + + uint32_t new = + PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D); + + trace_nv2a_pgraph_flip_increment_write(old, new); + pg->frame_time++; +} + +DEF_METHOD(NV097, FLIP_STALL) +{ + trace_nv2a_pgraph_flip_stall(); + d->pgraph.renderer->ops.surface_update(d, false, true, true); + d->pgraph.renderer->ops.flip_stall(d); + nv2a_profile_flip_stall(); + pg->waiting_for_flip = true; +} + +// TODO: these should be loading the dma objects from ramin here? + +DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES) +{ + pg->dma_notifies = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_A) +{ + pg->dma_a = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_B) +{ + pg->dma_b = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE) +{ + pg->dma_state = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR) +{ + /* try to get any straggling draws in before the surface's changed :/ */ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->dma_color = parameter; + pg->surface_color.buffer_dirty = true; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA) +{ + pg->dma_zeta = parameter; + pg->surface_zeta.buffer_dirty = true; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A) +{ + pg->dma_vertex_a = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B) +{ + pg->dma_vertex_b = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE) +{ + pg->dma_semaphore = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT) +{ + d->pgraph.renderer->ops.process_pending_reports(d); + + pg->dma_report = parameter; +} + +DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.clip_x = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X); + pg->surface_shape.clip_width = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH); +} + +DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.clip_y = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y); + pg->surface_shape.clip_height = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT); +} + +DEF_METHOD(NV097, SET_SURFACE_FORMAT) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.color_format = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR); + pg->surface_shape.zeta_format = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA); + pg->surface_shape.anti_aliasing = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING); + pg->surface_shape.log_width = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH); + pg->surface_shape.log_height = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT); + + int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE); + if (surface_type != pg->surface_type) { + pg->surface_type = surface_type; + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } +} + +DEF_METHOD(NV097, SET_SURFACE_PITCH) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR); + unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA); + + pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch); + pg->surface_color.pitch = color_pitch; + + pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch); + pg->surface_zeta.pitch = zeta_pitch; +} + +DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter); + pg->surface_color.offset = parameter; +} + +DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter); + pg->surface_zeta.offset = parameter; +} + +DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW) +{ + int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAI0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG0, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG1, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_ADDRESS) +{ + int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXADDRESS0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_CONTROL0) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + bool stencil_write_enable = + parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE, + stencil_write_enable); + + uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT); + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format); + + bool z_perspective = + parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, + z_perspective); +} + +DEF_METHOD(NV097, SET_COLOR_MATERIAL) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_EMISSION, + (parameter >> 0) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_AMBIENT, + (parameter >> 2) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_DIFFUSE, + (parameter >> 4) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_SPECULAR, + (parameter >> 6) & 3); +} + +DEF_METHOD(NV097, SET_FOG_MODE) +{ + /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */ + unsigned int mode; + switch (parameter) { + case NV097_SET_FOG_MODE_V_LINEAR: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break; + case NV097_SET_FOG_MODE_V_EXP: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break; + case NV097_SET_FOG_MODE_V_EXP2: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break; + case NV097_SET_FOG_MODE_V_EXP_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break; + case NV097_SET_FOG_MODE_V_EXP2_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break; + case NV097_SET_FOG_MODE_V_LINEAR_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOG_MODE, + mode); +} + +DEF_METHOD(NV097, SET_FOG_GEN_MODE) +{ + unsigned int mode; + switch (parameter) { + case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break; + case NV097_SET_FOG_GEN_MODE_V_RADIAL: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break; + case NV097_SET_FOG_GEN_MODE_V_PLANAR: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break; + case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break; + case NV097_SET_FOG_GEN_MODE_V_FOG_X: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGGENMODE, mode); +} + +DEF_METHOD(NV097, SET_FOG_ENABLE) +{ + /* + FIXME: There is also: + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGENABLE, + parameter); + */ + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOGENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_FOG_COLOR) +{ + /* PGRAPH channels are ARGB, parameter channels are ABGR */ + uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED); + uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN); + uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE); + uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_RED, red); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_GREEN, green); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_BLUE, blue); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_ALPHA, alpha); +} + +DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter); +} + +DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL) +{ + int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4; + for (; slot < 8; ++slot) { + pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPX0 + slot * 4, parameter); + } +} + +DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL) +{ + int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4; + for (; slot < 8; ++slot) { + pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPY0 + slot * 4, parameter); + } +} + +DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EN, parameter); +} + +DEF_METHOD(NV097, SET_CULL_FACE_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_CULLENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_DITHER_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter); +} + +DEF_METHOD(NV097, SET_LIGHTING_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_LIGHTING, + parameter); +} + +DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_POINTPARAMSENABLE, + parameter); + PG_SET_MASK(NV_PGRAPH_CONTROL_3, + NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_SKIN_MODE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_SKIN, + parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter); +} + +DEF_METHOD(NV097, SET_ALPHA_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF); +} + +DEF_METHOD(NV097, SET_ALPHA_REF) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHAREF, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR) +{ + unsigned int factor; + switch (parameter) { + case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO: + factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; + default: + NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_SFACTOR, factor); +} + +DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR) +{ + unsigned int factor; + switch (parameter) { + case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO: + factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; + default: + NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_DFACTOR, factor); +} + +DEF_METHOD(NV097, SET_BLEND_COLOR) +{ + pgraph_reg_w(pg, NV_PGRAPH_BLENDCOLOR, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_EQUATION) +{ + unsigned int equation; + switch (parameter) { + case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT: + equation = 0; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT: + equation = 1; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_ADD: + equation = 2; break; + case NV097_SET_BLEND_EQUATION_V_MIN: + equation = 3; break; + case NV097_SET_BLEND_EQUATION_V_MAX: + equation = 4; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED: + equation = 5; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED: + equation = 6; break; + default: + NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EQN, equation); +} + +DEF_METHOD(NV097, SET_DEPTH_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZFUNC, + parameter & 0xF); +} + +DEF_METHOD(NV097, SET_COLOR_MASK) +{ + pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg); + + bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE; + bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE; + bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE; + bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue); +} + +DEF_METHOD(NV097, SET_DEPTH_MASK) +{ + pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg); + + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC_REF) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter); +} + +static unsigned int kelvin_map_stencil_op(uint32_t parameter) +{ + unsigned int op; + switch (parameter) { + case NV097_SET_STENCIL_OP_V_KEEP: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; + case NV097_SET_STENCIL_OP_V_ZERO: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; + case NV097_SET_STENCIL_OP_V_REPLACE: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; + case NV097_SET_STENCIL_OP_V_INCRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; + case NV097_SET_STENCIL_OP_V_DECRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; + case NV097_SET_STENCIL_OP_V_INVERT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; + case NV097_SET_STENCIL_OP_V_INCR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; + case NV097_SET_STENCIL_OP_V_DECR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; + default: + assert(false); + break; + } + return op; +} + +DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_SHADE_MODE) +{ + switch (parameter) { + case NV097_SET_SHADE_MODE_V_FLAT: + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT); + break; + case NV097_SET_SHADE_MODE_V_SMOOTH: + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); + break; + default: + /* Discard */ + break; + } +} + +DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter); +} + +DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETBIAS, parameter); +} + +static unsigned int kelvin_map_polygon_mode(uint32_t parameter) +{ + unsigned int mode; + switch (parameter) { + case NV097_SET_FRONT_POLYGON_MODE_V_POINT: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; + case NV097_SET_FRONT_POLYGON_MODE_V_LINE: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; + case NV097_SET_FRONT_POLYGON_MODE_V_FILL: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; + default: + assert(false); + break; + } + return mode; +} + +DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_FRONTFACEMODE, + kelvin_map_polygon_mode(parameter)); +} + +DEF_METHOD(NV097, SET_BACK_POLYGON_MODE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_BACKFACEMODE, + kelvin_map_polygon_mode(parameter)); +} + +DEF_METHOD(NV097, SET_CLIP_MIN) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMIN, parameter); +} + +DEF_METHOD(NV097, SET_CLIP_MAX) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMAX, parameter); +} + +DEF_METHOD(NV097, SET_CULL_FACE) +{ + unsigned int face; + switch (parameter) { + case NV097_SET_CULL_FACE_V_FRONT: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break; + case NV097_SET_CULL_FACE_V_BACK: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break; + case NV097_SET_CULL_FACE_V_FRONT_AND_BACK: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_CULLCTRL, face); +} + +DEF_METHOD(NV097, SET_FRONT_FACE) +{ + bool ccw; + switch (parameter) { + case NV097_SET_FRONT_FACE_V_CW: + ccw = false; break; + case NV097_SET_FRONT_FACE_V_CCW: + ccw = true; break; + default: + NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_FRONTFACE, + ccw ? 1 : 0); +} + +DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, + parameter); +} + +DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION) +{ + int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4; + // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true; +} + +DEF_METHOD(NV097, SET_MATERIAL_ALPHA) +{ + pg->material_alpha = *(float*)¶meter; +} + +DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_LIGHTS, parameter); +} + +static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) +{ + assert(channel < 4); + unsigned int texgen; + switch (parameter) { + case NV097_SET_TEXGEN_S_DISABLE: + texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; + case NV097_SET_TEXGEN_S_EYE_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; + case NV097_SET_TEXGEN_S_OBJECT_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; + case NV097_SET_TEXGEN_S_SPHERE_MAP: + assert(channel < 2); + texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; + case NV097_SET_TEXGEN_S_REFLECTION_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; + case NV097_SET_TEXGEN_S_NORMAL_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; + default: + assert(false); + break; + } + return texgen; +} + +DEF_METHOD(NV097, SET_TEXGEN_S) +{ + int slot = (method - NV097_SET_TEXGEN_S) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S + : NV_PGRAPH_CSV1_A_T0_S; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 0)); +} + +DEF_METHOD(NV097, SET_TEXGEN_T) +{ + int slot = (method - NV097_SET_TEXGEN_T) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T + : NV_PGRAPH_CSV1_A_T0_T; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 1)); +} + +DEF_METHOD(NV097, SET_TEXGEN_R) +{ + int slot = (method - NV097_SET_TEXGEN_R) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R + : NV_PGRAPH_CSV1_A_T0_R; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 2)); +} + +DEF_METHOD(NV097, SET_TEXGEN_Q) +{ + int slot = (method - NV097_SET_TEXGEN_Q) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q + : NV_PGRAPH_CSV1_A_T0_Q; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 3)); +} + +DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE) +{ + int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4; + pg->texture_matrix_enable[slot] = parameter; +} + +DEF_METHOD(NV097, SET_POINT_SIZE) +{ + PG_SET_MASK(NV_PGRAPH_POINTSIZE, NV097_SET_POINT_SIZE_V, parameter); +} + +DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX) +{ + int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4; + // pg->projection_matrix[slot] = *(float*)¶meter; + unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4; + pg->vsh_constants[row][slot%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX) +{ + int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4; + unsigned int matnum = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4; + pg->vsh_constants[row][entry % 4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX) +{ + int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4; + unsigned int matnum = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4; + pg->vsh_constants[row][entry % 4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX) +{ + int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4; + unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4; + pg->vsh_constants[row][slot%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX) +{ + int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4; + unsigned int tex = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4; + pg->vsh_constants[row][entry%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_FOG_PARAMS) +{ + int slot = (method - NV097_SET_FOG_PARAMS) / 4; + if (slot < 2) { + pgraph_reg_w(pg, NV_PGRAPH_FOGPARAM0 + slot*4, parameter); + } else { + /* FIXME: No idea where slot = 2 is */ + } + + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true; +} + +/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */ +DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S) +{ + int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4; + unsigned int tex = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4; + pg->vsh_constants[row][entry%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_TEXGEN_REF, + parameter); +} + +DEF_METHOD_INC(NV097, SET_FOG_PLANE) +{ + int slot = (method - NV097_SET_FOG_PLANE) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true; +} + +DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4; + // ?? + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true; +} + +DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET) +{ + int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true; +} + +DEF_METHOD_INC(NV097, SET_POINT_PARAMS) +{ + int slot = (method - NV097_SET_POINT_PARAMS) / 4; + pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */ +} + +DEF_METHOD_INC(NV097, SET_EYE_POSITION) +{ + int slot = (method - NV097_SET_EYE_POSITION) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true; +} + +DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0) +{ + int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1) +{ + int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR1 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW) +{ + int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAO0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW) +{ + int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORI0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE) +{ + int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true; +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM) +{ + int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4; + + int program_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR); + + assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + pg->program_data[program_load][slot%4] = parameter; + pg->program_data_dirty = true; + + if (slot % 4 == 3) { + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1); + } +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT) +{ + int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4; + int const_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR); + + assert(const_load < NV2A_VERTEXSHADER_CONSTANTS); + // VertexShaderConstant *constant = &pg->constants[const_load]; + pg->vsh_constants_dirty[const_load] |= + (parameter != pg->vsh_constants[const_load][slot%4]); + pg->vsh_constants[const_load][slot%4] = parameter; + + if (slot % 4 == 3) { + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX3F) +{ + int slot = (method - NV097_SET_VERTEX3F) / 4; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); + attribute->inline_value[slot] = *(float*)¶meter; + attribute->inline_value[3] = 1.0f; + if (slot == 2) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +/* Handles NV097_SET_BACK_LIGHT_* */ +DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4; + unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16; + slot /= 16; /* [Light index] */ + assert(slot < 8); + switch(part * 4) { + case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ... + NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true; + break; + case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ... + NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true; + break; + case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ... + NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true; + break; + default: + assert(false); + break; + } +} + +/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */ +DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4; + unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32; + slot /= 32; /* [Light index] */ + assert(slot < 8); + switch(part * 4) { + case NV097_SET_LIGHT_AMBIENT_COLOR ... + NV097_SET_LIGHT_AMBIENT_COLOR + 8: + part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true; + break; + case NV097_SET_LIGHT_DIFFUSE_COLOR ... + NV097_SET_LIGHT_DIFFUSE_COLOR + 8: + part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true; + break; + case NV097_SET_LIGHT_SPECULAR_COLOR ... + NV097_SET_LIGHT_SPECULAR_COLOR + 8: + part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true; + break; + case NV097_SET_LIGHT_LOCAL_RANGE: + pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter; + pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true; + break; + case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ... + NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8: + part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4; + pg->light_infinite_half_vector[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_INFINITE_DIRECTION ... + NV097_SET_LIGHT_INFINITE_DIRECTION + 8: + part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4; + pg->light_infinite_direction[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_SPOT_FALLOFF ... + NV097_SET_LIGHT_SPOT_FALLOFF + 8: + part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true; + break; + case NV097_SET_LIGHT_SPOT_DIRECTION ... + NV097_SET_LIGHT_SPOT_DIRECTION + 12: + part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true; + break; + case NV097_SET_LIGHT_LOCAL_POSITION ... + NV097_SET_LIGHT_LOCAL_POSITION + 8: + part -= NV097_SET_LIGHT_LOCAL_POSITION / 4; + pg->light_local_position[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_LOCAL_ATTENUATION ... + NV097_SET_LIGHT_LOCAL_ATTENUATION + 8: + part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4; + pg->light_local_attenuation[slot][part] = *(float*)¶meter; + break; + default: + assert(false); + break; + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX4F) +{ + int slot = (method - NV097_SET_VERTEX4F) / 4; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); + attribute->inline_value[slot] = *(float*)¶meter; + if (slot == 3) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_NORMAL3S) +{ + int slot = (method - NV097_SET_NORMAL3S) / 4; + unsigned int part = slot % 2; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL); + int16_t val = parameter & 0xFFFF; + attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f); + val = parameter >> 16; + attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f); +} + +#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + unsigned int part = slot % 2; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[part * 2 + 0] = \ + (float)(int16_t)(parameter & 0xFFFF); \ + attribute->inline_value[part * 2 + 1] = \ + (float)(int16_t)(parameter >> 16); \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATTRIBUTE_4S + +#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \ + do { \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \ + attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \ + attribute->inline_value[2] = 0.0f; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATRIBUTE_TEX_2S + +#define SET_VERTEX_COLOR_3F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F) +{ + SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F) +{ + SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR); +} + +#undef SET_VERTEX_COLOR_3F + +#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_NORMAL3F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL); +} + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F, + NV2A_VERTEX_ATTR_SPECULAR); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1); +} + + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATTRIBUTE_F + +#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + attribute->inline_value[2] = 0.0f; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F, + NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F, + NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F, + NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F, + NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATRIBUTE_TEX_2F + +#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \ + do { \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \ + attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \ + attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \ + attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB) +{ + SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB, + NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB) +{ + SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB, + NV2A_VERTEX_ATTR_SPECULAR); +} + +#undef SET_VERTEX_ATTRIBUTE_4UB + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT) +{ + int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4; + VertexAttribute *attr = &pg->vertex_attributes[slot]; + attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE); + attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE); + attr->stride = GET_MASK(parameter, + NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE); + + NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", + attr->format, attr->count, attr->stride); + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + attr->size = 1; + assert(attr->count == 4); + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + attr->size = 1; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + attr->size = 2; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + attr->size = 4; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + attr->size = 2; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + attr->size = 4; + assert(attr->count == 1); + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + if (attr->format == NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP) { + pg->compressed_attrs |= (1 << slot); + } else { + pg->compressed_attrs &= ~(1 << slot); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET) +{ + int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4; + + pg->vertex_attributes[slot].dma_select = parameter & 0x80000000; + pg->vertex_attributes[slot].offset = parameter & 0x7fffffff; +} + +DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP_ENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_LOGIC_OP) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP, + parameter & 0xF); +} + +DEF_METHOD(NV097, CLEAR_REPORT_VALUE) +{ + d->pgraph.renderer->ops.clear_report_value(d); +} + +DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE) +{ + pg->zpass_pixel_count_enable = parameter; +} + +DEF_METHOD(NV097, GET_REPORT) +{ + uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + d->pgraph.renderer->ops.get_report(d, parameter); +} + +DEF_METHOD_INC(NV097, SET_EYE_DIRECTION) +{ + int slot = (method - NV097_SET_EYE_DIRECTION) / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true; +} + +DEF_METHOD(NV097, SET_BEGIN_END) +{ + if (parameter == NV097_SET_BEGIN_END_OP_END) { + if (pg->primitive_mode == PRIM_TYPE_INVALID) { + NV2A_DPRINTF("End without Begin!\n"); + } + nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS); + d->pgraph.renderer->ops.draw_end(d); + pgraph_reset_inline_buffers(pg); + pg->primitive_mode = PRIM_TYPE_INVALID; + } else { + if (pg->primitive_mode != PRIM_TYPE_INVALID) { + NV2A_DPRINTF("Begin without End!\n"); + } + assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON); + pg->primitive_mode = parameter; + pgraph_reset_inline_buffers(pg); + d->pgraph.renderer->ops.draw_begin(d); + } +} + +DEF_METHOD(NV097, SET_TEXTURE_OFFSET) +{ + int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXOFFSET0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_FORMAT) +{ + int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64; + + bool dma_select = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2; + bool cubemap = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE); + unsigned int border_source = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE); + unsigned int dimensionality = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY); + unsigned int color_format = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR); + unsigned int levels = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS); + unsigned int log_width = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U); + unsigned int log_height = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V); + unsigned int log_depth = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P); + + unsigned int reg = NV_PGRAPH_TEXFMT0 + slot * 4; + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_COLOR, color_format); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth); + + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_CONTROL0) +{ + int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXCTL0_0 + slot*4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_CONTROL1) +{ + int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXCTL1_0 + slot*4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_FILTER) +{ + int slot = (method - NV097_SET_TEXTURE_FILTER) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXFILTER0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT) +{ + int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXIMAGERECT0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_PALETTE) +{ + int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64; + + bool dma_select = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1; + unsigned int length = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH); + unsigned int offset = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET); + + unsigned int reg = NV_PGRAPH_TEXPALETTE0 + slot * 4; + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select); + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length); + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset); + + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR) +{ + int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64; + pgraph_reg_w(pg, NV_PGRAPH_BORDERCOLOR0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4; + if (slot < 16) { + /* discard */ + return; + } + + slot -= 16; + const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01, + NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 }; + pgraph_reg_w(pg, swizzle[slot % 4] + slot / 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64; + if (slot == 0) { + /* discard */ + return; + } + + slot--; + pgraph_reg_w(pg, NV_PGRAPH_BUMPSCALE1 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64; + if (slot == 0) { + /* discard */ + return; + } + + slot--; + pgraph_reg_w(pg, NV_PGRAPH_BUMPOFFSET1 + slot * 4, parameter); +} + +static void pgraph_expand_draw_arrays(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + uint32_t start = pg->draw_arrays_start[pg->draw_arrays_length - 1]; + uint32_t count = pg->draw_arrays_count[pg->draw_arrays_length - 1]; + + /* Render any previously squashed DRAW_ARRAYS calls. This case would be + * triggered if a set of BEGIN+DA+END triplets is followed by the + * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */ + if (pg->draw_arrays_length > 1) { + d->pgraph.renderer->ops.flush_draw(d); + pgraph_reset_inline_buffers(pg); + } + assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); + for (unsigned int i = 0; i < count; i++) { + pg->inline_elements[pg->inline_elements_length++] = start + i; + } + + pgraph_reset_draw_arrays(pg); +} + +void pgraph_check_within_begin_end_block(PGRAPHState *pg) +{ + if (pg->primitive_mode == PRIM_TYPE_INVALID) { + NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n"); + } +} + +DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16) +{ + pgraph_check_within_begin_end_block(pg); + + if (pg->draw_arrays_length) { + pgraph_expand_draw_arrays(d); + } + + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF; + pg->inline_elements[pg->inline_elements_length++] = parameter >> 16; +} + +DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32) +{ + pgraph_check_within_begin_end_block(pg); + + if (pg->draw_arrays_length) { + pgraph_expand_draw_arrays(d); + } + + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[pg->inline_elements_length++] = parameter; +} + +DEF_METHOD(NV097, DRAW_ARRAYS) +{ + pgraph_check_within_begin_end_block(pg); + + int32_t start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX); + int32_t count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1; + + if (pg->inline_elements_length) { + /* FIXME: Determine HW behavior for overflow case. */ + assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); + assert(!pg->draw_arrays_prevent_connect); + + for (unsigned int i = 0; i < count; i++) { + pg->inline_elements[pg->inline_elements_length++] = start + i; + } + return; + } + + pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start); + pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count); + + assert(pg->draw_arrays_length < ARRAY_SIZE(pg->draw_arrays_start)); + + /* Attempt to connect contiguous primitives */ + if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) { + unsigned int last_start = + pg->draw_arrays_start[pg->draw_arrays_length - 1]; + int32_t *last_count = + &pg->draw_arrays_count[pg->draw_arrays_length - 1]; + if (start == (last_start + *last_count)) { + *last_count += count; + return; + } + } + + pg->draw_arrays_start[pg->draw_arrays_length] = start; + pg->draw_arrays_count[pg->draw_arrays_length] = count; + pg->draw_arrays_length++; + pg->draw_arrays_prevent_connect = false; +} + +DEF_METHOD_NON_INC(NV097, INLINE_ARRAY) +{ + pgraph_check_within_begin_end_block(pg); + assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_array[pg->inline_array_length++] = parameter; +} + +DEF_METHOD_INC(NV097, SET_EYE_VECTOR) +{ + int slot = (method - NV097_SET_EYE_VECTOR) / 4; + pgraph_reg_w(pg, NV_PGRAPH_EYEVEC0 + slot * 4, parameter); +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4; + unsigned int part = slot % 2; + slot /= 2; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[part] = *(float*)¶meter; + /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */ + attribute->inline_value[2] = 0.0; + attribute->inline_value[3] = 1.0; + if ((slot == 0) && (part == 1)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4; + unsigned int part = slot % 4; + slot /= 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[part] = *(float*)¶meter; + if ((slot == 0) && (part == 3)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S) +{ + int slot = (method - NV097_SET_VERTEX_DATA2S) / 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); + attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); + attribute->inline_value[2] = 0.0; + attribute->inline_value[3] = 1.0; + if (slot == 0) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB) +{ + int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[0] = (parameter & 0xFF) / 255.0; + attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0; + attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0; + attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0; + if (slot == 0) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4; + unsigned int part = slot % 2; + slot /= 2; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + + attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF); + attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16); + if ((slot == 0) && (part == 1)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET) +{ + pgraph_reg_w(pg, NV_PGRAPH_SEMAPHOREOFFSET, parameter); +} + +DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + //qemu_mutex_unlock(&d->pgraph.lock); + //qemu_mutex_lock_iothread(); + + uint32_t semaphore_offset = pgraph_reg_r(pg, NV_PGRAPH_SEMAPHOREOFFSET); + + hwaddr semaphore_dma_len; + uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore, + &semaphore_dma_len); + assert(semaphore_offset < semaphore_dma_len); + semaphore_data += semaphore_offset; + + stl_le_p((uint32_t*)semaphore_data, parameter); + + //qemu_mutex_lock(&d->pgraph.lock); + //qemu_mutex_unlock_iothread(); +} + +DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL) +{ + switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) { + case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL: + PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL); + break; + case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP: + PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP); + break; + default: + /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */ + assert(!"Invalid zclamp value"); + break; + } +} + +DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL) +{ + PG_SET_MASK(NV_PGRAPH_ANTIALIASING, NV_PGRAPH_ANTIALIASING_ENABLE, + GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE)); + // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001) +} + +DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZSTENCILCLEARVALUE, parameter); +} + +DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE) +{ + pgraph_reg_w(pg, NV_PGRAPH_COLORCLEARVALUE, parameter); +} + +DEF_METHOD(NV097, CLEAR_SURFACE) +{ + d->pgraph.renderer->ops.clear_surface(d, parameter); +} + +DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL) +{ + pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTX, parameter); +} + +DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL) +{ + pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTY, parameter); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR) +{ + int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4; + pgraph_reg_w(pg, NV_PGRAPH_SPECFOGFACTOR0 + slot*4, parameter); +} + +DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADERCLIPMODE, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW) +{ + int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORO0 + slot*4, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_CONTROL) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINECTL, parameter); +} + +DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADOWZSLOPETHRESHOLD, parameter); + assert(parameter == 0x7F800000); /* FIXME: Unimplemented */ +} + +DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_SHADOWCTL, NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC, + parameter); +} + +DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADERPROG, parameter); +} + +DEF_METHOD(NV097, SET_DOT_RGBMAPPING) +{ + PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFF, + GET_MASK(parameter, 0xFFF)); +} + +DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT) +{ + PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFFF000, + GET_MASK(parameter, 0xFFFF000)); +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA) +{ + int slot = (method - NV097_SET_TRANSFORM_DATA) / 4; + pg->vertex_state_shader_v0[slot] = parameter; +} + +DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM) +{ + unsigned int program_start = parameter; + assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + Nv2aVshProgram program; + Nv2aVshParseResult result = nv2a_vsh_parse_program( + &program, + pg->program_data[program_start], + NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start); + assert(result == NV2AVPR_SUCCESS); + + Nv2aVshCPUXVSSExecutionState state_linkage; + Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state( + &state_linkage, (float*)pg->vsh_constants); + memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0)); + + nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty); + + nv2a_vsh_program_destroy(&program); +} + +DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_MODE, + GET_MASK(parameter, + NV097_SET_TRANSFORM_EXECUTION_MODE_MODE)); + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_RANGE_MODE, + GET_MASK(parameter, + NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE)); +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN) +{ + pg->enable_vertex_program_write = parameter; +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD) +{ + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter); +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START) +{ + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + PG_SET_MASK(NV_PGRAPH_CSV0_C, + NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter); +} + +DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD) +{ + assert(parameter < NV2A_VERTEXSHADER_CONSTANTS); + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter); +} + +void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]) +{ + uint32_t clear_color = pgraph_reg_r(pg, NV_PGRAPH_COLORCLEARVALUE); + + float *r = &rgba[0], *g = &rgba[1], *b = &rgba[2], *a = &rgba[3]; + + /* Handle RGB */ + switch(pg->surface_shape.color_format) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5: + *r = ((clear_color >> 10) & 0x1F) / 31.0f; + *g = ((clear_color >> 5) & 0x1F) / 31.0f; + *b = (clear_color & 0x1F) / 31.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: + *r = ((clear_color >> 11) & 0x1F) / 31.0f; + *g = ((clear_color >> 5) & 0x3F) / 63.0f; + *b = (clear_color & 0x1F) / 31.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: + *r = ((clear_color >> 16) & 0xFF) / 255.0f; + *g = ((clear_color >> 8) & 0xFF) / 255.0f; + *b = (clear_color & 0xFF) / 255.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8: + /* Xbox D3D doesn't support clearing those */ + default: + *r = 1.0f; + *g = 0.0f; + *b = 1.0f; + fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported", + pg->surface_shape.color_format); + assert(false); + break; + } + + /* Handle alpha */ + switch(pg->surface_shape.color_format) { + /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we + * also have to clear non-alpha bits with alpha value? + * As GL doesn't own those pixels we'd have to do this on + * our own in xbox memory. + */ + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: + *a = ((clear_color >> 24) & 0x7F) / 127.0f; + assert(false); /* Untested */ + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: + *a = ((clear_color >> 24) & 0xFF) / 255.0f; + break; + default: + *a = 1.0f; + break; + } +} + +void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, + int *stencil) +{ + uint32_t clear_zstencil = + pgraph_reg_r(pg, NV_PGRAPH_ZSTENCILCLEARVALUE); + *stencil = 0; + *depth = 1.0; + + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: { + uint16_t z = clear_zstencil & 0xFFFF; + /* FIXME: Remove bit for stencil clear? */ + if (pg->surface_shape.z_format) { + *depth = convert_f16_to_float(z) / f16_max; + } else { + *depth = z / (float)0xFFFF; + } + break; + } + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: { + *stencil = clear_zstencil & 0xFF; + uint32_t z = clear_zstencil >> 8; + if (pg->surface_shape.z_format) { + *depth = convert_f24_to_float(z) / f24_max; + } else { + *depth = z / (float)0xFFFFFF; + } + break; + } + default: + fprintf(stderr, "Unknown zeta surface format: 0x%x\n", + pg->surface_shape.zeta_format); + assert(false); + break; + } +} + +void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, + uint32_t result) +{ + PGRAPHState *pg = &d->pgraph; + + uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */ + uint32_t done = 0; // FIXME: Check + + hwaddr report_dma_len; + uint8_t *report_data = + (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len); + + hwaddr offset = GET_MASK(parameter, NV097_GET_REPORT_OFFSET); + assert(offset < report_dma_len); + report_data += offset; + + stq_le_p((uint64_t *)&report_data[0], timestamp); + stl_le_p((uint32_t *)&report_data[8], result); + stl_le_p((uint32_t *)&report_data[12], done); + + NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset); +} diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h new file mode 100644 index 0000000000..799e879c06 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -0,0 +1,383 @@ +/* + * QEMU Geforce NV2A PGRAPH internal definitions + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_H +#define HW_XBOX_NV2A_PGRAPH_H + +#include "xemu-config.h" +#include "qemu/osdep.h" +#include "qemu/bitmap.h" +#include "qemu/units.h" +#include "qemu/thread.h" +#include "cpu.h" + +#include "shaders.h" +#include "surface.h" +#include "util.h" + +typedef struct NV2AState NV2AState; +typedef struct PGRAPHNullState PGRAPHNullState; +typedef struct PGRAPHGLState PGRAPHGLState; +typedef struct PGRAPHVkState PGRAPHVkState; + +typedef struct VertexAttribute { + bool dma_select; + hwaddr offset; + + /* inline arrays are packed in order? + * Need to pass the offset to converted attributes */ + unsigned int inline_array_offset; + + float inline_value[4]; + + unsigned int format; + unsigned int size; /* size of the data type */ + unsigned int count; /* number of components */ + uint32_t stride; + + bool needs_conversion; + + float *inline_buffer; + bool inline_buffer_populated; +} VertexAttribute; + +typedef struct Surface { + bool draw_dirty; + bool buffer_dirty; + bool write_enabled_cache; + unsigned int pitch; + + hwaddr offset; +} Surface; + +typedef struct KelvinState { + hwaddr object_instance; +} KelvinState; + +typedef struct ContextSurfaces2DState { + hwaddr object_instance; + hwaddr dma_image_source; + hwaddr dma_image_dest; + unsigned int color_format; + unsigned int source_pitch, dest_pitch; + hwaddr source_offset, dest_offset; +} ContextSurfaces2DState; + +typedef struct ImageBlitState { + hwaddr object_instance; + hwaddr context_surfaces; + unsigned int operation; + unsigned int in_x, in_y; + unsigned int out_x, out_y; + unsigned int width, height; +} ImageBlitState; + +typedef struct BetaState { + hwaddr object_instance; + uint32_t beta; +} BetaState; + +typedef struct PGRAPHRenderer { + CONFIG_DISPLAY_RENDERER type; + const char *name; + struct { + void (*early_context_init)(void); + void (*init)(NV2AState *d); + void (*init_thread)(NV2AState *d); + void (*finalize)(NV2AState *d); + void (*clear_report_value)(NV2AState *d); + void (*clear_surface)(NV2AState *d, uint32_t parameter); + void (*draw_begin)(NV2AState *d); + void (*draw_end)(NV2AState *d); + void (*flip_stall)(NV2AState *d); + void (*flush_draw)(NV2AState *d); + void (*get_report)(NV2AState *d, uint32_t parameter); + void (*image_blit)(NV2AState *d); + void (*pre_savevm_trigger)(NV2AState *d); + void (*pre_savevm_wait)(NV2AState *d); + void (*pre_shutdown_trigger)(NV2AState *d); + void (*pre_shutdown_wait)(NV2AState *d); + void (*process_pending)(NV2AState *d); + void (*process_pending_reports)(NV2AState *d); + void (*surface_flush)(NV2AState *d); + void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write); + void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale); + unsigned int (*get_surface_scale_factor)(NV2AState *d); + int (*get_framebuffer_surface)(NV2AState *d); + } ops; +} PGRAPHRenderer; + +typedef struct PGRAPHState { + QemuMutex lock; + + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + + int frame_time; + int draw_time; + + /* subchannels state we're not sure the location of... */ + ContextSurfaces2DState context_surfaces_2d; + ImageBlitState image_blit; + KelvinState kelvin; + BetaState beta; + + hwaddr dma_color, dma_zeta; + Surface surface_color, surface_zeta; + unsigned int surface_type; + SurfaceShape surface_shape; + SurfaceShape last_surface_shape; + + struct { + int clip_x; + int clip_width; + int clip_y; + int clip_height; + int width; + int height; + } surface_binding_dim; // FIXME: Refactor + + hwaddr dma_a, dma_b; + bool texture_dirty[NV2A_MAX_TEXTURES]; + + bool texture_matrix_enable[NV2A_MAX_TEXTURES]; + + hwaddr dma_state; + hwaddr dma_notifies; + hwaddr dma_semaphore; + + hwaddr dma_report; + hwaddr report_offset; + bool zpass_pixel_count_enable; + + hwaddr dma_vertex_a, dma_vertex_b; + + uint32_t primitive_mode; + + bool enable_vertex_program_write; // FIXME: Not used anywhere??? + + uint32_t vertex_state_shader_v0[4]; + uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; + bool program_data_dirty; + + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; + + /* lighting constant arrays */ + uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; + bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; + uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; + bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; + uint32_t ltc1[NV2A_LTC1_COUNT][4]; + bool ltc1_dirty[NV2A_LTC1_COUNT]; + + float material_alpha; + + // should figure out where these are in lighting context + float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; + float light_infinite_direction[NV2A_MAX_LIGHTS][3]; + float light_local_position[NV2A_MAX_LIGHTS][3]; + float light_local_attenuation[NV2A_MAX_LIGHTS][3]; + + float point_params[8]; + + VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; + uint16_t compressed_attrs; + uint16_t uniform_attrs; + uint16_t swizzle_attrs; + + unsigned int inline_array_length; + uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_elements_length; + uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_buffer_length; + + unsigned int draw_arrays_length; + unsigned int draw_arrays_min_start; + unsigned int draw_arrays_max_count; + /* FIXME: Unknown size, possibly endless, 1250 will do for now */ + /* Keep in sync with size used in nv2a.c */ + int32_t draw_arrays_start[1250]; + int32_t draw_arrays_count[1250]; + bool draw_arrays_prevent_connect; + + uint32_t regs_[0x2000]; + DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t)); + + bool clearing; + bool waiting_for_nop; + bool waiting_for_flip; + bool waiting_for_context_switch; + + bool flush_pending; + QemuEvent flush_complete; + + bool sync_pending; + QemuEvent sync_complete; + + unsigned int surface_scale_factor; + uint8_t *scale_buf; + + const PGRAPHRenderer *renderer; + union { + PGRAPHNullState *null_renderer_state; + PGRAPHGLState *gl_renderer_state; + PGRAPHVkState *vk_renderer_state; + }; +} PGRAPHState; + +void pgraph_init(NV2AState *d); +void pgraph_init_thread(NV2AState *d); +void pgraph_destroy(PGRAPHState *pg); +void pgraph_context_switch(NV2AState *d, unsigned int channel_id); +int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, + uint32_t parameter, uint32_t *parameters, + size_t num_words_available, size_t max_lookahead_words, + bool inc); +void pgraph_check_within_begin_end_block(PGRAPHState *pg); + +void *pfifo_thread(void *arg); +void pfifo_kick(NV2AState *d); + +void pgraph_renderer_register(const PGRAPHRenderer *renderer); + +// FIXME: Move from here + +extern NV2AState *g_nv2a; + +// FIXME: Add new function pgraph_is_texture_sampler_active() + +static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r) +{ + assert(r % 4 == 0); + return pg->regs_[r]; +} + +static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v) +{ + assert(r % 4 == 0); + if (pg->regs_[r] != v) { + bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1); + } + pg->regs_[r] = v; +} + +void pgraph_clear_dirty_reg_map(PGRAPHState *pg); + +static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg) +{ + return test_bit(reg / sizeof(uint32_t), pg->regs_dirty); +} + +static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage) +{ + assert(stage < NV2A_MAX_TEXTURES); + uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F; + return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5; +} + +static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx) +{ + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4); + return // pgraph_is_texture_stage_active(pg, texture_idx) && + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); +} + +static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format) +{ + return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8; +} + +static inline bool pgraph_color_write_enabled(PGRAPHState *pg) +{ + return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & ( + NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); +} + +static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg) +{ + return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & ( + NV_PGRAPH_CONTROL_0_ZWRITEENABLE + | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); +} + +static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + switch (pg->surface_shape.anti_aliasing) { + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: + if (width) { *width *= 2; } + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: + if (width) { *width *= 2; } + if (height) { *height *= 2; } + break; + default: + assert(false); + break; + } +} + +static inline void pgraph_apply_scaling_factor(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + *width *= pg->surface_scale_factor; + *height *= pg->surface_scale_factor; +} + +void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]); +void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil); + +/* Vertex */ +void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr); +void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); +void pgraph_reset_inline_buffers(PGRAPHState *pg); +void pgraph_reset_draw_arrays(PGRAPHState *pg); +void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data); + +/* RDI */ +uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, + unsigned int address); +void pgraph_rdi_write(PGRAPHState *pg, unsigned int select, + unsigned int address, uint32_t val); + +static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba) +{ + rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */ + rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */ + rgba[2] = (argb & 0xFF) / 255.0f; /* blue */ + rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */ +} + +void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result); + +#endif diff --git a/hw/xbox/nv2a/pgraph/profile.c b/hw/xbox/nv2a/pgraph/profile.c new file mode 100644 index 0000000000..69a1b5bfbd --- /dev/null +++ b/hw/xbox/nv2a/pgraph/profile.c @@ -0,0 +1,74 @@ +/* + * QEMU Geforce NV2A profiling helpers + * + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" + +NV2AStats g_nv2a_stats; + +void nv2a_profile_increment(void) +{ + int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + const int64_t fps_update_interval = 250000; + g_nv2a_stats.last_flip_time = now; + + static int64_t frame_count = 0; + frame_count++; + + static int64_t ts = 0; + int64_t delta = now - ts; + if (delta >= fps_update_interval) { + g_nv2a_stats.increment_fps = frame_count * 1000000 / delta; + ts = now; + frame_count = 0; + } +} + +void nv2a_profile_flip_stall(void) +{ + int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000; + + g_nv2a_stats.frame_working.mspf = render_time; + g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] = + g_nv2a_stats.frame_working; + g_nv2a_stats.frame_ptr = + (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES; + g_nv2a_stats.frame_count++; + memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working)); +} + +const char *nv2a_profile_get_counter_name(unsigned int cnt) +{ + const char *default_names[NV2A_PROF__COUNT] = { + #define _X(x) stringify(x), + NV2A_PROF_COUNTERS_XMAC + #undef _X + }; + + assert(cnt < NV2A_PROF__COUNT); + return default_names[cnt] + 10; /* 'NV2A_PROF_' */ +} + +int nv2a_profile_get_counter_value(unsigned int cnt) +{ + assert(cnt < NV2A_PROF__COUNT); + unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) % + NV2A_PROF_NUM_FRAMES; + return g_nv2a_stats.frame_history[idx].counters[cnt]; +} diff --git a/hw/xbox/nv2a/psh.h b/hw/xbox/nv2a/pgraph/psh.h similarity index 96% rename from hw/xbox/nv2a/psh.h rename to hw/xbox/nv2a/pgraph/psh.h index 65ef4e43a2..6232a2834a 100644 --- a/hw/xbox/nv2a/psh.h +++ b/hw/xbox/nv2a/pgraph/psh.h @@ -20,7 +20,8 @@ #ifndef HW_NV2A_PSH_H #define HW_NV2A_PSH_H -#include "shaders_common.h" +#include +#include enum PshAlphaFunc { ALPHA_FUNC_NEVER, @@ -51,6 +52,8 @@ enum ConvolutionFilter { }; typedef struct PshState { + bool vulkan; + /* fragment shader - register combiner stuff */ uint32_t combiner_control; uint32_t shader_stage_program; @@ -67,6 +70,7 @@ typedef struct PshState { bool compare_mode[4][4]; bool alphakill[4]; enum ConvolutionFilter conv_tex[4]; + bool tex_x8y24[4]; float border_logical_size[4][3]; float border_inv_real_size[4][3]; @@ -82,6 +86,4 @@ typedef struct PshState { bool smooth_shading; } PshState; -MString *psh_translate(const PshState state); - #endif diff --git a/hw/xbox/nv2a/pgraph/rdi.c b/hw/xbox/nv2a/pgraph/rdi.c new file mode 100644 index 0000000000..297c7a67c0 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/rdi.c @@ -0,0 +1,60 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" + +uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, + unsigned int address) +{ + uint32_t r = 0; + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + r = pg->vsh_constants[address / 4][3 - address % 4]; + break; + default: + fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n", + select, address); + assert(false); + break; + } + return r; +} + +void pgraph_rdi_write(PGRAPHState *pg, unsigned int select, + unsigned int address, uint32_t val) +{ + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert(false); /* Untested */ + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + pg->vsh_constants_dirty[address / 4] |= + (val != pg->vsh_constants[address / 4][3 - address % 4]); + pg->vsh_constants[address / 4][3 - address % 4] = val; + break; + default: + NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n", + select, address, val); + break; + } +} diff --git a/hw/xbox/nv2a/s3tc.c b/hw/xbox/nv2a/pgraph/s3tc.c similarity index 71% rename from hw/xbox/nv2a/s3tc.c rename to hw/xbox/nv2a/pgraph/s3tc.c index 454cc43aee..affd058e66 100644 --- a/hw/xbox/nv2a/s3tc.c +++ b/hw/xbox/nv2a/pgraph/s3tc.c @@ -1,5 +1,5 @@ /* - * QEMU texture decompression routines + * S3TC Texture Decompression * * Copyright (c) 2020 Wilhelm Kovatch * @@ -25,13 +25,9 @@ #include "qemu/osdep.h" #include "s3tc.h" -static inline void decode_bc1_colors(uint16_t c0, - uint16_t c1, - uint8_t r[4], - uint8_t g[4], - uint8_t b[4], - uint8_t a[16], - bool transparent) +static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4], + uint8_t g[4], uint8_t b[4], uint8_t a[16], + bool transparent) { r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8, g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC, @@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0, } } -static inline void write_block_to_texture(uint8_t *converted_data, - uint32_t indices, - int i, int j, int width, - int z_pos_factor, - uint8_t r[4], - uint8_t g[4], - uint8_t b[4], - uint8_t a[16], - bool separate_alpha) +static void write_block_to_texture(uint8_t *converted_data, uint32_t indices, + int i, int j, int width, int z_pos_factor, + uint8_t r[4], uint8_t g[4], uint8_t b[4], + uint8_t a[16], bool separate_alpha) { int x0 = i * 4, y0 = j * 4; @@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data, int xy_index = y_index + x - x0; uint8_t index = (indices >> 2 * xy_index) & 0x03; uint8_t alpha_index = separate_alpha ? xy_index : index; - uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index]; - *(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color; + uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4; + *p++ = r[index]; + *p++ = g[index]; + *p++ = b[index]; + *p++ = a[alpha_index]; } } } -static inline void decompress_dxt1_block(const uint8_t block_data[8], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt1_block(const uint8_t block_data[8], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[0], c1 = ((uint16_t*)block_data)[1]; @@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8], r, g, b, a, false); } -static inline void decompress_dxt3_block(const uint8_t block_data[16], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt3_block(const uint8_t block_data[16], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[4], c1 = ((uint16_t*)block_data)[5]; @@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16], r, g, b, a, true); } -static inline void decompress_dxt5_block(const uint8_t block_data[16], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt5_block(const uint8_t block_data[16], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[4], c1 = ((uint16_t*)block_data)[5]; @@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16], r, g, b, a, true); } -uint8_t *decompress_3d_texture_data(GLint color_format, - const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int depth) +uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height, unsigned int depth) { assert((width > 0) && (width % 4 == 0)); assert((height > 0) && (height % 4 == 0)); @@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format, int sub_block_index = block_index * block_depth + slice; int z_pos_factor = (k * block_depth + slice) * width * height; - if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) { decompress_dxt1_block(data + 8 * sub_block_index, converted_data, i, j, width, z_pos_factor); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) { decompress_dxt3_block(data + 16 * sub_block_index, converted_data, i, j, width, z_pos_factor); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) { decompress_dxt5_block(data + 16 * sub_block_index, converted_data, i, j, width, z_pos_factor); } else { @@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format, return converted_data; } -uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, - unsigned int width, unsigned int height) +uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height) { assert((width > 0) && (width % 4 == 0)); assert((height > 0) && (height % 4 == 0)); @@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, for (int j = 0; j < num_blocks_y; j++) { for (int i = 0; i < num_blocks_x; i++) { int block_index = j * num_blocks_x + i; - if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) { decompress_dxt1_block(data + 8 * block_index, converted_data, i, j, width, 0); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) { decompress_dxt3_block(data + 16 * block_index, converted_data, i, j, width, 0); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) { decompress_dxt5_block(data + 16 * block_index, converted_data, i, j, width, 0); } else { diff --git a/hw/xbox/nv2a/s3tc.h b/hw/xbox/nv2a/pgraph/s3tc.h similarity index 63% rename from hw/xbox/nv2a/s3tc.h rename to hw/xbox/nv2a/pgraph/s3tc.h index 87dad0d3c4..6a10074e74 100644 --- a/hw/xbox/nv2a/s3tc.h +++ b/hw/xbox/nv2a/pgraph/s3tc.h @@ -1,5 +1,5 @@ /* - * QEMU texture decompression routines + * S3TC Texture Decompression * * Copyright (c) 2020 Wilhelm Kovatch * @@ -22,18 +22,23 @@ * THE SOFTWARE. */ -#ifndef S3TC_H -#define S3TC_H +#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H +#define HW_XBOX_NV2A_PGRAPH_S3TC_H -#include "gl/gloffscreen.h" +#include -uint8_t *decompress_3d_texture_data(GLint color_format, - const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int depth); +enum S3TC_DECOMPRESS_FORMAT { + S3TC_DECOMPRESS_FORMAT_DXT1, + S3TC_DECOMPRESS_FORMAT_DXT3, + S3TC_DECOMPRESS_FORMAT_DXT5, +}; -uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, - unsigned int width, unsigned int height); +uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height, unsigned int depth); + +uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height); #endif diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c new file mode 100644 index 0000000000..82737b44f4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -0,0 +1,295 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/debug.h" +#include "texture.h" +#include "pgraph.h" +#include "shaders.h" + +ShaderState pgraph_get_shader_state(PGRAPHState *pg) +{ + bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_MODE) == 2; + + bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_MODE) == 0; + + int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), + NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); + + pg->program_data_dirty = false; + + ShaderState state; + + // We will hash it, so make sure any padding is zerod + memset(&state, 0, sizeof(ShaderState)); + + state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN; + state.surface_scale_factor = pg->surface_scale_factor; + + state.compressed_attrs = pg->compressed_attrs; + state.uniform_attrs = pg->uniform_attrs; + state.swizzle_attrs = pg->swizzle_attrs; + + /* register combiner stuff */ + state.psh.vulkan = state.vulkan; + state.psh.window_clip_exclusive = + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; + state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL); + state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG); + state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL); + state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0); + state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1); + + state.psh.alpha_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE; + state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC); + + state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE; + + state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC); + + state.fixed_function = fixed_function; + + /* fixed function stuff */ + if (fixed_function) { + state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_SKIN); + state.lighting = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING); + state.normalization = + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE; + + /* color material */ + state.emission_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION); + state.ambient_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT); + state.diffuse_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE); + state.specular_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR); + } + + /* vertex program stuff */ + state.vertex_program = vertex_program, + state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + + state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); + state.point_size = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f; + if (state.point_params_enable) { + for (int i = 0; i < 8; i++) { + state.point_params[i] = pg->point_params[i]; + } + } + + /* geometry shader stuff */ + state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode; + state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); + state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE); + + state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; + state.psh.smooth_shading = state.smooth_shading; + + state.program_length = 0; + + if (vertex_program) { + // copy in vertex program tokens + for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; + i++) { + uint32_t *cur_token = (uint32_t *)&pg->program_data[i]; + memcpy(&state.program_data[state.program_length], cur_token, + VSH_TOKEN_SIZE * sizeof(uint32_t)); + state.program_length++; + + if (vsh_get_field(cur_token, FLD_FINAL)) { + break; + } + } + } + + /* Texgen */ + for (int i = 0; i < 4; i++) { + unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; + for (int j = 0; j < 4; j++) { + unsigned int masks[] = { + (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q + }; + state.texgen[i][j] = + (enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]); + } + } + + /* Fog */ + state.fog_enable = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE; + if (state.fog_enable) { + /*FIXME: Use CSV0_D? */ + state.fog_mode = (enum VshFogMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE); + state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_FOGGENMODE); + } else { + /* FIXME: Do we still pass the fogmode? */ + state.fog_mode = (enum VshFogMode)0; + state.foggen = (enum VshFoggen)0; + } + + /* Texture matrices */ + for (int i = 0; i < 4; i++) { + state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; + } + + /* Lighting */ + if (state.lighting) { + for (int i = 0; i < NV2A_MAX_LIGHTS; i++) { + state.light[i] = (enum VshLight)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); + } + } + + /* Copy content of enabled combiner stages */ + int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF; + for (int i = 0; i < num_stages; i++) { + state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4); + state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4); + state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4); + state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4); + // constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + // constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + state.psh.compare_mode[i][j] = + (pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1; + } + + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4); + bool enabled = pgraph_is_texture_stage_active(pg, i) && + (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE); + if (!enabled) { + continue; + } + + state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; + + uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4); + unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); + BasicColorFormatInfo f = kelvin_color_format_info_map[color_format]; + state.psh.rect_tex[i] = f.linear; + state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT; + + uint32_t border_source = + GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + state.psh.border_logical_size[i][0] = 0.0f; + state.psh.border_logical_size[i][1] = 0.0f; + state.psh.border_logical_size[i][2] = 0.0f; + if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { + if (!f.linear && !cubemap) { + // The actual texture will be (at least) double the reported + // size and shifted by a 4 texel border but texture coordinates + // will still be relative to the reported size. + unsigned int reported_width = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int reported_height = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + unsigned int reported_depth = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); + + state.psh.border_logical_size[i][0] = reported_width; + state.psh.border_logical_size[i][1] = reported_height; + state.psh.border_logical_size[i][2] = reported_depth; + + if (reported_width < 8) { + state.psh.border_inv_real_size[i][0] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][0] = + 1.0f / (reported_width * 2.0f); + } + if (reported_height < 8) { + state.psh.border_inv_real_size[i][1] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][1] = + 1.0f / (reported_height * 2.0f); + } + if (reported_depth < 8) { + state.psh.border_inv_real_size[i][2] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][2] = + 1.0f / (reported_depth * 2.0f); + } + } else { + NV2A_UNIMPLEMENTED( + "Border source texture with linear %d cubemap %d", f.linear, + cubemap); + } + } + + /* Keep track of whether texture data has been loaded as signed + * normalized integers or not. This dictates whether or not we will need + * to re-map in fragment shader for certain texture modes (e.g. + * bumpenvmap). + * + * FIXME: When signed texture data is loaded as unsigned and remapped in + * fragment shader, there may be interpolation artifacts. Fix this to + * support signed textures more appropriately. + */ +#if 0 // FIXME + state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM) + || (f.gl_internal_format == GL_RG8_SNORM); +#endif + state.psh.shadow_map[i] = f.depth; + + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4); + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED; + /* FIXME: We do not distinguish between min and mag when + * performing convolution. Just use it if specified for min (common AA + * case). + */ + if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) { + int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL); + assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX || + k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3); + kernel = (enum ConvolutionFilter)k; + } + + state.psh.conv_tex[i] = kernel; + } + + return state; +} diff --git a/hw/xbox/nv2a/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h similarity index 56% rename from hw/xbox/nv2a/shaders.h rename to hw/xbox/nv2a/pgraph/shaders.h index 0362da1099..842658f808 100644 --- a/hw/xbox/nv2a/shaders.h +++ b/hw/xbox/nv2a/pgraph/shaders.h @@ -18,17 +18,14 @@ * License along with this library; if not, see . */ -#ifndef HW_NV2A_SHADERS_H -#define HW_NV2A_SHADERS_H +#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H +#define HW_XBOX_NV2A_PGRAPH_SHADERS_H -#include "qemu/thread.h" -#include "qapi/qmp/qstring.h" -#include "gl/gloffscreen.h" +#include +#include "hw/xbox/nv2a/nv2a_regs.h" -#include "nv2a_regs.h" #include "vsh.h" #include "psh.h" -#include "lru.h" enum ShaderPrimitiveMode { PRIM_TYPE_INVALID, @@ -57,10 +54,13 @@ enum MaterialColorSource { }; typedef struct ShaderState { + bool vulkan; unsigned int surface_scale_factor; PshState psh; uint16_t compressed_attrs; + uint16_t uniform_attrs; + uint16_t swizzle_attrs; bool texture_matrix_enable[4]; enum VshTexgen texgen[4][4]; @@ -101,61 +101,8 @@ typedef struct ShaderState { bool smooth_shading; } ShaderState; -typedef struct ShaderBinding { - GLuint gl_program; - GLenum gl_primitive_mode; - - GLint psh_constant_loc[9][2]; - GLint alpha_ref_loc; - - GLint bump_mat_loc[NV2A_MAX_TEXTURES]; - GLint bump_scale_loc[NV2A_MAX_TEXTURES]; - GLint bump_offset_loc[NV2A_MAX_TEXTURES]; - GLint tex_scale_loc[NV2A_MAX_TEXTURES]; - - GLint surface_size_loc; - GLint clip_range_loc; - - GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; - uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - - GLint inv_viewport_loc; - GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; - GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; - GLint ltc1_loc[NV2A_LTC1_COUNT]; - - GLint fog_color_loc; - GLint fog_param_loc[2]; - GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; - GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS]; - GLint light_local_position_loc[NV2A_MAX_LIGHTS]; - GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS]; - - GLint clip_region_loc[8]; - - GLint material_alpha_loc; -} ShaderBinding; - -typedef struct ShaderLruNode { - LruNode node; - bool cached; - void *program; - size_t program_size; - GLenum program_format; - ShaderState state; - ShaderBinding *binding; - QemuThread *save_thread; -} ShaderLruNode; - typedef struct PGRAPHState PGRAPHState; -GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode); -void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state); -ShaderBinding *generate_shaders(const ShaderState *state); - -void shader_cache_init(PGRAPHState *pg); -void shader_write_cache_reload_list(PGRAPHState *pg); -bool shader_load_from_memory(ShaderLruNode *snode); -void shader_cache_to_disk(ShaderLruNode *snode); +ShaderState pgraph_get_shader_state(PGRAPHState *pg); #endif diff --git a/hw/xbox/nv2a/pgraph/surface.h b/hw/xbox/nv2a/pgraph/surface.h new file mode 100644 index 0000000000..d51bc04ea4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/surface.h @@ -0,0 +1,35 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H +#define HW_XBOX_NV2A_PGRAPH_SURFACE_H + +typedef struct SurfaceShape { + unsigned int z_format; + unsigned int color_format; + unsigned int zeta_format; + unsigned int log_width, log_height; + unsigned int clip_x, clip_y; + unsigned int clip_width, clip_height; + unsigned int anti_aliasing; +} SurfaceShape; + +#endif diff --git a/hw/xbox/nv2a/swizzle.c b/hw/xbox/nv2a/pgraph/swizzle.c similarity index 100% rename from hw/xbox/nv2a/swizzle.c rename to hw/xbox/nv2a/pgraph/swizzle.c diff --git a/hw/xbox/nv2a/swizzle.h b/hw/xbox/nv2a/pgraph/swizzle.h similarity index 94% rename from hw/xbox/nv2a/swizzle.h rename to hw/xbox/nv2a/pgraph/swizzle.h index 21889b39cf..78ff0740a4 100644 --- a/hw/xbox/nv2a/swizzle.h +++ b/hw/xbox/nv2a/pgraph/swizzle.h @@ -18,8 +18,10 @@ * License along with this library; if not, see . */ -#ifndef HW_XBOX_SWIZZLE_H -#define HW_XBOX_SWIZZLE_H +#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H +#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H + +#include void swizzle_box( const uint8_t *src_buf, diff --git a/hw/xbox/nv2a/pgraph/texture.c b/hw/xbox/nv2a/pgraph/texture.c new file mode 100644 index 0000000000..e5350ea8d4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/texture.c @@ -0,0 +1,405 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "texture.h" +#include "util.h" + +const BasicColorFormatInfo kelvin_color_format_info_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true, + true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true }, +}; + +hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + int i = texture_idx; + + uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4); + unsigned int dma_select = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); + + hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4); + + hwaddr dma_len; + uint8_t *texture_data; + if (dma_select) { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len); + } else { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len); + } + assert(offset < dma_len); + texture_data += offset; + + return texture_data - d->vram_ptr; +} + +hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + int i = texture_idx; + + uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4); + bool palette_dma_select = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); + unsigned int palette_length_index = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); + unsigned int palette_offset = + palette & NV_PGRAPH_TEXPALETTE0_OFFSET; + + unsigned int palette_length = 0; + switch (palette_length_index) { + case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; + default: assert(false); break; + } + if (length) { + *length = palette_length; + } + + hwaddr palette_dma_len; + uint8_t *palette_data; + if (palette_dma_select) { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len); + } else { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len); + } + assert(palette_offset < palette_dma_len); + palette_data += palette_offset; + + return palette_data - d->vram_ptr; +} + +size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape) +{ + BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format]; + size_t length = 0; + + if (f.linear) { + assert(shape->cubemap == false); + assert(shape->dimensionality == 2); + length = shape->height * shape->pitch; + } else { + if (shape->dimensionality >= 2) { + unsigned int w = shape->width, h = shape->height; + int level; + if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) { + for (level = 0; level < shape->levels; level++) { + w = MAX(w, 1); + h = MAX(h, 1); + length += w * h * f.bytes_per_pixel; + w /= 2; + h /= 2; + } + } else { + /* Compressed textures are a bit different */ + unsigned int block_size = + shape->color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ? + 8 : 16; + for (level = 0; level < shape->levels; level++) { + w = MAX(w, 1); + h = MAX(h, 1); + unsigned int phys_w = (w + 3) & ~3, + phys_h = (h + 3) & ~3; + length += phys_w/4 * phys_h/4 * block_size; + w /= 2; + h /= 2; + } + } + if (shape->cubemap) { + assert(shape->dimensionality == 2); + length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); + length *= 6; + } + if (shape->dimensionality >= 3) { + length *= shape->depth; + } + } + } + + return length; +} + +TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx) +{ + int i = texture_idx; + + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4); + uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4); + uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4); + +#if DEBUG_NV2A + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4); + uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4); +#endif + + unsigned int min_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); + unsigned int max_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); + + unsigned int pitch = + GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); + + bool cubemap = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + unsigned int dimensionality = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); + + int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F; + if (tex_mode == 0x02) { + assert(pgraph_is_texture_enabled(pg, texture_idx)); + // assert(state.dimensionality == 3); + + // OVERRIDE + // dimensionality = 3; + } + + unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); + unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); + unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); + + unsigned int rect_width = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4), + NV_PGRAPH_TEXIMAGERECT0_WIDTH); + unsigned int rect_height = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4), + NV_PGRAPH_TEXIMAGERECT0_HEIGHT); +#ifdef DEBUG_NV2A + unsigned int lod_bias = + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); +#endif + unsigned int border_source = GET_MASK(fmt, + NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + + NV2A_DPRINTF(" texture %d is format 0x%x, " + "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s)," + " filter %x %x, levels %d-%d %d bias %d\n", + i, color_format, address, + rect_width, rect_height, + 1 << log_width, 1 << log_height, 1 << log_depth, + pitch, + cubemap ? "; cubemap" : "", + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN), + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG), + min_mipmap_level, max_mipmap_level, levels, + lod_bias); + + assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map)); + BasicColorFormatInfo f = kelvin_color_format_info_map[color_format]; + if (f.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", + color_format); + abort(); + } + + unsigned int width, height, depth; + if (f.linear) { + assert(dimensionality == 2); + width = rect_width; + height = rect_height; + depth = 1; + } else { + width = 1 << log_width; + height = 1 << log_height; + depth = 1 << log_depth; + pitch = 0; + + levels = MIN(levels, max_mipmap_level + 1); + + /* Discard mipmap levels that would be smaller than 1x1. + * FIXME: Is this actually needed? + * + * >> Level 0: 32 x 4 + * Level 1: 16 x 2 + * Level 2: 8 x 1 + * Level 3: 4 x 1 + * Level 4: 2 x 1 + * Level 5: 1 x 1 + */ + levels = MIN(levels, MAX(log_width, log_height) + 1); + assert(levels > 0); + + if (dimensionality == 3) { + /* FIXME: What about 3D mipmaps? */ + if (log_width < 2 || log_height < 2) { + /* Base level is smaller than 4x4... */ + levels = 1; + } else { + levels = MIN(levels, MIN(log_width, log_height) - 1); + } + } + min_mipmap_level = MIN(levels-1, min_mipmap_level); + max_mipmap_level = MIN(levels-1, max_mipmap_level); + } + + TextureShape shape; + + // We will hash it, so make sure any padding is zero + memset(&shape, 0, sizeof(shape)); + + shape.cubemap = cubemap; + shape.dimensionality = dimensionality; + shape.color_format = color_format; + shape.levels = levels; + shape.width = width; + shape.height = height; + shape.depth = depth; + shape.min_mipmap_level = min_mipmap_level; + shape.max_mipmap_level = max_mipmap_level; + shape.pitch = pitch; + shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR; + return shape; +} + +uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data, + const uint8_t *palette_data, + unsigned int width, unsigned int height, + unsigned int depth, unsigned int row_pitch, + unsigned int slice_pitch, + size_t *converted_size) +{ + size_t size = 0; + uint8_t *converted_data; + + if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { + size = width * height * depth * 4; + converted_data = g_malloc(size); + const uint8_t *src = data; + uint32_t *dst = (uint32_t *)converted_data; + for (int z = 0; z < depth; z++) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint8_t index = src[y * row_pitch + x]; + uint32_t color = *(uint32_t *)(palette_data + index * 4); + *dst++ = color; + } + } + src += slice_pitch; + } + } else if (s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || + s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { + // TODO: Investigate whether a non-1 depth is possible. + // Generally the hardware asserts when attempting to use volumetric + // textures in linear formats. + assert(depth == 1); /* FIXME */ + // FIXME: only valid if control0 register allows for colorspace + // conversion + size = width * height * 4; + converted_data = g_malloc(size); + uint8_t *pixel = converted_data; + for (int y = 0; y < height; y++) { + const uint8_t *line = &data[y * row_pitch * depth]; + for (int x = 0; x < width; x++, pixel += 4) { + if (s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], + &pixel[2]); + } else { + convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], + &pixel[2]); + } + pixel[3] = 255; + } + } + } else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { + assert(depth == 1); /* FIXME */ + size = width * height * 3; + converted_data = g_malloc(size); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2); + int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3]; + /* Maps 5 bit G and B signed value range to 8 bit + * signed values. R is probably unsigned. + */ + rgb655 ^= (1 << 9) | (1 << 4); + pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; + pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; + pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; + } + } + } else { + return NULL; + } + + if (converted_size) { + *converted_size = size; + } + return converted_data; +} diff --git a/hw/xbox/nv2a/pgraph/texture.h b/hw/xbox/nv2a/pgraph/texture.h new file mode 100644 index 0000000000..4c9818ca3c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/texture.h @@ -0,0 +1,67 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H +#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H + +#include "qemu/osdep.h" +#include "cpu.h" + +#include +#include + +#include "hw/xbox/nv2a/nv2a_regs.h" + +typedef struct PGRAPHState PGRAPHState; + +typedef struct TextureShape { + bool cubemap; + unsigned int dimensionality; + unsigned int color_format; + unsigned int levels; + unsigned int width, height, depth; + bool border; + + unsigned int min_mipmap_level, max_mipmap_level; + unsigned int pitch; +} TextureShape; + +typedef struct BasicColorFormatInfo { + unsigned int bytes_per_pixel; + bool linear; + bool depth; +} BasicColorFormatInfo; + +extern const BasicColorFormatInfo kelvin_color_format_info_map[66]; + +uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data, + const uint8_t *palette_data, + unsigned int width, unsigned int height, + unsigned int depth, unsigned int row_pitch, + unsigned int slice_pitch, + size_t *converted_size); + +hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx); +hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length); +TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx); +size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape); + +#endif diff --git a/hw/xbox/nv2a/gl/gloffscreen_common.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c similarity index 100% rename from hw/xbox/nv2a/gl/gloffscreen_common.c rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c diff --git a/hw/xbox/nv2a/gl/gloffscreen.h b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h similarity index 100% rename from hw/xbox/nv2a/gl/gloffscreen.h rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h diff --git a/hw/xbox/nv2a/gl/gloffscreen_sdl.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c similarity index 98% rename from hw/xbox/nv2a/gl/gloffscreen_sdl.c rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c index 2221067ddd..277694cc50 100644 --- a/hw/xbox/nv2a/gl/gloffscreen_sdl.c +++ b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c @@ -1,7 +1,7 @@ /* * Offscreen OpenGL abstraction layer -- SDL based * - * Copyright (c) 2018-2021 Matt Borgerson + * Copyright (c) 2018-2024 Matt Borgerson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/hw/xbox/nv2a/thirdparty/meson.build b/hw/xbox/nv2a/pgraph/thirdparty/meson.build similarity index 62% rename from hw/xbox/nv2a/thirdparty/meson.build rename to hw/xbox/nv2a/pgraph/thirdparty/meson.build index ec4068a77c..d0139f1763 100644 --- a/hw/xbox/nv2a/thirdparty/meson.build +++ b/hw/xbox/nv2a/pgraph/thirdparty/meson.build @@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu', include_directories: ['.', 'nv2a_vsh_cpu/src']) nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu, include_directories: ['nv2a_vsh_cpu/src']) + +libgloffscreen = static_library('libgloffscreen', + sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'), + dependencies: sdl) +gloffscreen = declare_dependency(link_with: libgloffscreen, + include_directories: ['gloffscreen']) diff --git a/hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu b/hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu similarity index 100% rename from hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu rename to hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu diff --git a/hw/xbox/nv2a/pgraph/util.h b/hw/xbox/nv2a/pgraph/util.h new file mode 100644 index 0000000000..c8a28d3c0d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/util.h @@ -0,0 +1,86 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H +#define HW_XBOX_NV2A_PGRAPH_UTIL_H + +static const float f16_max = 511.9375f; +static const float f24_max = 1.0E30; + +/* 16 bit to [0.0, F16_MAX = 511.9375] */ +static inline +float convert_f16_to_float(uint16_t f16) { + if (f16 == 0x0000) { return 0.0; } + uint32_t i = (f16 << 11) + 0x3C000000; + return *(float*)&i; +} + +/* 24 bit to [0.0, F24_MAX] */ +static inline +float convert_f24_to_float(uint32_t f24) { + assert(!(f24 >> 24)); + f24 &= 0xFFFFFF; + if (f24 == 0x000000) { return 0.0; } + uint32_t i = f24 << 7; + return *(float*)&i; +} + +static inline +uint8_t cliptobyte(int x) +{ + return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); +} + +static inline +void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 1] - 128; + e = (int)line[ix * 2 + 1] - 128; + } else { + d = (int)line[ix * 2 + 1] - 128; + e = (int)line[ix * 2 + 3] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} + +static inline +void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2 + 1] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 2] - 128; + e = (int)line[ix * 2 + 0] - 128; + } else { + d = (int)line[ix * 2 + 0] - 128; + e = (int)line[ix * 2 + 2] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} + +#endif diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c new file mode 100644 index 0000000000..47f7cb5688 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vertex.c @@ -0,0 +1,131 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" + +void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data) +{ + assert(attr->count <= 4); + attr->inline_value[0] = 0.0f; + attr->inline_value[1] = 0.0f; + attr->inline_value[2] = 0.0f; + attr->inline_value[3] = 1.0f; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + for (uint32_t i = 0; i < attr->count; ++i) { + attr->inline_value[i] = (float)data[i] / 255.0f; + } + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: { + const int16_t *val = (const int16_t *) data; + for (uint32_t i = 0; i < attr->count; ++i, ++val) { + attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f); + } + break; + } + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + memcpy(attr->inline_value, data, attr->size * attr->count); + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: { + const int16_t *val = (const int16_t *) data; + for (uint32_t i = 0; i < attr->count; ++i, ++val) { + attr->inline_value[i] = (float)*val; + } + break; + } + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + const int32_t val = *(const int32_t *)data; + int32_t x = val & 0x7FF; + if (x & 0x400) { + x |= 0xFFFFF800; + } + int32_t y = (val >> 11) & 0x7FF; + if (y & 0x400) { + y |= 0xFFFFF800; + } + int32_t z = (val >> 22) & 0x7FF; + if (z & 0x200) { + z |= 0xFFFFFC00; + } + + attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f); + attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f); + attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f); + break; + } + default: + fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n", + attr->format); + assert(!"Unsupported attribute type"); + break; + } +} + +void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr) +{ + VertexAttribute *attribute = &pg->vertex_attributes[attr]; + + if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) { + return; + } + + /* Now upload the previous attribute value */ + attribute->inline_buffer_populated = true; + for (int i = 0; i < pg->inline_buffer_length; i++) { + memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value, + sizeof(float) * 4); + } +} + +void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) +{ + pgraph_check_within_begin_end_block(pg); + assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attribute = &pg->vertex_attributes[i]; + if (attribute->inline_buffer_populated) { + memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4], + attribute->inline_value, sizeof(float) * 4); + } + } + + pg->inline_buffer_length++; +} + +void pgraph_reset_inline_buffers(PGRAPHState *pg) +{ + pg->inline_elements_length = 0; + pg->inline_array_length = 0; + pg->inline_buffer_length = 0; + pgraph_reset_draw_arrays(pg); +} + +void pgraph_reset_draw_arrays(PGRAPHState *pg) +{ + pg->draw_arrays_length = 0; + pg->draw_arrays_min_start = -1; + pg->draw_arrays_max_count = 0; + pg->draw_arrays_prevent_connect = false; +} diff --git a/hw/xbox/nv2a/pgraph/vk/blit.c b/hw/xbox/nv2a/pgraph/vk/blit.c new file mode 100644 index 0000000000..e4529a3c58 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/blit.c @@ -0,0 +1,177 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +void pgraph_vk_image_blit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + pgraph_vk_surface_update(d, false, true, true); + + assert(context_surfaces->object_instance == image_blit->context_surfaces); + + unsigned int bytes_per_pixel; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_Y8: + bytes_per_pixel = 1; + break; + case NV062_SET_COLOR_FORMAT_LE_R5G6B5: + bytes_per_pixel = 2; + break; + case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_Y32: + bytes_per_pixel = 4; + break; + default: + fprintf(stderr, "Unknown blit surface format: 0x%x\n", + context_surfaces->color_format); + assert(false); + break; + } + + hwaddr source_dma_len, dest_dma_len; + + uint8_t *source = (uint8_t *)nv_dma_map( + d, context_surfaces->dma_image_source, &source_dma_len); + assert(context_surfaces->source_offset < source_dma_len); + source += context_surfaces->source_offset; + + uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, + &dest_dma_len); + assert(context_surfaces->dest_offset < dest_dma_len); + dest += context_surfaces->dest_offset; + + hwaddr source_addr = source - d->vram_ptr; + hwaddr dest_addr = dest - d->vram_ptr; + + SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr); + if (surf_src) { + pgraph_vk_surface_download_if_dirty(d, surf_src); + } + + SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr); + if (surf_dest) { + if (image_blit->height < surf_dest->height || + image_blit->width < surf_dest->width) { + pgraph_vk_surface_download_if_dirty(d, surf_dest); + } else { + // The blit will completely replace the surface so any pending + // download should be discarded. + surf_dest->download_pending = false; + surf_dest->draw_dirty = false; + } + surf_dest->upload_pending = true; + pg->draw_time++; + } + + hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + + image_blit->in_x * bytes_per_pixel; + hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + + image_blit->out_x * bytes_per_pixel; + + hwaddr source_size = + (image_blit->height - 1) * context_surfaces->source_pitch + + image_blit->width * bytes_per_pixel; + hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + + image_blit->width * bytes_per_pixel; + + /* FIXME: What does hardware do in this case? */ + assert(source_addr + source_offset + source_size <= + memory_region_size(d->vram)); + assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); + + uint8_t *source_row = source + source_offset; + uint8_t *dest_row = dest + dest_offset; + + if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); + for (unsigned int y = 0; y < image_blit->height; y++) { + memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); + uint32_t max_beta_mult = 0x7f80; + uint32_t beta_mult = beta->beta >> 16; + uint32_t inv_beta_mult = max_beta_mult - beta_mult; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + for (unsigned int ch = 0; ch < 3; ch++) { + uint32_t a = source_row[x * 4 + ch] * beta_mult; + uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; + dest_row[x * 4 + ch] = (a + b) / max_beta_mult; + } + } + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else { + fprintf(stderr, "Unknown blit operation: 0x%x\n", + image_blit->operation); + assert(false && "Unknown blit operation"); + } + + NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); + + bool needs_alpha_patching; + uint8_t alpha_override; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0xff; + break; + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0; + break; + default: + needs_alpha_patching = false; + alpha_override = 0; + } + + if (needs_alpha_patching) { + dest_row = dest + dest_offset; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + dest_row[x * 4 + 3] = alpha_override; + } + dest_row += context_surfaces->dest_pitch; + } + } + + dest_addr += dest_offset; + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_NV2A_TEX); +} diff --git a/hw/xbox/nv2a/pgraph/vk/buffer.c b/hw/xbox/nv2a/pgraph/vk/buffer.c new file mode 100644 index 0000000000..440f8ae56e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/buffer.c @@ -0,0 +1,206 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" +#include + +static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkBufferCreateInfo buffer_create_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = buffer->buffer_size, + .usage = buffer->usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info, + &buffer->alloc_info, &buffer->buffer, + &buffer->allocation, NULL)); +} + +static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation); + buffer->buffer = VK_NULL_HANDLE; + buffer->allocation = VK_NULL_HANDLE; +} + +void pgraph_vk_init_buffers(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Profile buffer sizes + + VmaAllocationCreateInfo host_alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + }; + VmaAllocationCreateInfo device_alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .buffer_size = 4096 * 4096 * 4, + }; + + r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size, + }; + + r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .buffer_size = (1024 * 10) * (1024 * 10) * 8, + }; + + r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size, + }; + + r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + .buffer_size = sizeof(pg->inline_elements) * 100, + }; + + r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size, + }; + + // FIXME: Don't assume that we can render with host mapped buffer + r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .buffer_size = memory_region_size(d->vram), + }; + + r->bitmap_size = memory_region_size(d->vram) / 4096; + r->uploaded_bitmap = bitmap_new(r->bitmap_size); + bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); + + r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH * + 4 * sizeof(float) * 10, + }; + + r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size, + }; + + r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .buffer_size = 8 * 1024 * 1024, + }; + + r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size, + }; + + for (int i = 0; i < BUFFER_COUNT; i++) { + create_buffer(pg, &r->storage_buffers[i]); + } + + // FIXME: Add fallback path for device using host mapped memory + + int buffers_to_map[] = { BUFFER_VERTEX_RAM, + BUFFER_INDEX_STAGING, + BUFFER_VERTEX_INLINE_STAGING, + BUFFER_UNIFORM_STAGING }; + + for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) { + VK_CHECK(vmaMapMemory( + r->allocator, r->storage_buffers[buffers_to_map[i]].allocation, + (void **)&r->storage_buffers[buffers_to_map[i]].mapped)); + } +} + +void pgraph_vk_finalize_buffers(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < BUFFER_COUNT; i++) { + if (r->storage_buffers[i].mapped) { + vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation); + } + destroy_buffer(pg, &r->storage_buffers[i]); + } + + g_free(r->uploaded_bitmap); + r->uploaded_bitmap = NULL; +} + +bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, + VkDeviceSize size, + VkDeviceAddress alignment) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + StorageBuffer *b = &r->storage_buffers[index]; + return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size; +} + +VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, + VkDeviceSize *sizes, size_t count, + VkDeviceAddress alignment) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDeviceSize total_size = 0; + for (int i = 0; i < count; i++) { + total_size += sizes[i]; + } + assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment)); + + StorageBuffer *b = &r->storage_buffers[index]; + VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment); + + assert(b->mapped); + + for (int i = 0; i < count; i++) { + b->buffer_offset = ROUND_UP(b->buffer_offset, alignment); + memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]); + b->buffer_offset += sizes[i]; + } + + return starting_offset; +} diff --git a/hw/xbox/nv2a/pgraph/vk/command.c b/hw/xbox/nv2a/pgraph/vk/command.c new file mode 100644 index 0000000000..0e9fc9a2ee --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/command.c @@ -0,0 +1,119 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static void create_command_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QueueFamilyIndices indices = + pgraph_vk_find_queue_families(r->physical_device); + + VkCommandPoolCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = indices.queue_family, + }; + VK_CHECK( + vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool)); +} + +static void destroy_command_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyCommandPool(r->device, r->command_pool, NULL); +} + +static void create_command_buffers(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkCommandBufferAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = r->command_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = ARRAY_SIZE(r->command_buffers), + }; + VK_CHECK( + vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers)); + + r->command_buffer = r->command_buffers[0]; + r->aux_command_buffer = r->command_buffers[1]; +} + +static void destroy_command_buffers(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeCommandBuffers(r->device, r->command_pool, + ARRAY_SIZE(r->command_buffers), r->command_buffers); + + r->command_buffer = VK_NULL_HANDLE; + r->aux_command_buffer = VK_NULL_HANDLE; +} + +VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(!r->in_aux_command_buffer); + r->in_aux_command_buffer = true; + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info)); + + return r->aux_command_buffer; +} + +void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_aux_command_buffer); + + VK_CHECK(vkEndCommandBuffer(cmd)); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }; + VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE)); + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX); + VK_CHECK(vkQueueWaitIdle(r->queue)); + + r->in_aux_command_buffer = false; +} + +void pgraph_vk_init_command_buffers(PGRAPHState *pg) +{ + create_command_pool(pg); + create_command_buffers(pg); +} + +void pgraph_vk_finalize_command_buffers(PGRAPHState *pg) +{ + destroy_command_buffers(pg); + destroy_command_pool(pg); +} \ No newline at end of file diff --git a/hw/xbox/nv2a/pgraph/vk/constants.h b/hw/xbox/nv2a/pgraph/vk/constants.h new file mode 100644 index 0000000000..9ae8ba6dd4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/constants.h @@ -0,0 +1,418 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H +#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H + +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include + +static const VkFilter pgraph_texture_min_filter_vk_map[] = { + 0, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_LINEAR, +}; + +static const VkFilter pgraph_texture_mag_filter_vk_map[] = { + 0, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + 0, + VK_FILTER_LINEAR /* TODO: Convolution filter... */ +}; + +static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = { + 0, + VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ +}; + +static const VkBlendFactor pgraph_blend_factor_vk_map[] = { + VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + 0, + VK_BLEND_FACTOR_CONSTANT_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + VK_BLEND_FACTOR_CONSTANT_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, +}; + +static const VkBlendOp pgraph_blend_equation_vk_map[] = { + VK_BLEND_OP_SUBTRACT, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, + VK_BLEND_OP_MIN, + VK_BLEND_OP_MAX, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, +}; + +/* FIXME +static const GLenum pgraph_blend_logicop_map[] = { + GL_CLEAR, + GL_AND, + GL_AND_REVERSE, + GL_COPY, + GL_AND_INVERTED, + GL_NOOP, + GL_XOR, + GL_OR, + GL_NOR, + GL_EQUIV, + GL_INVERT, + GL_OR_REVERSE, + GL_COPY_INVERTED, + GL_OR_INVERTED, + GL_NAND, + GL_SET, +}; +*/ + +static const VkCullModeFlags pgraph_cull_face_vk_map[] = { + 0, + VK_CULL_MODE_FRONT_BIT, + VK_CULL_MODE_BACK_BIT, + VK_CULL_MODE_FRONT_AND_BACK, +}; + +static const VkCompareOp pgraph_depth_func_vk_map[] = { + VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_LESS, + VK_COMPARE_OP_EQUAL, + VK_COMPARE_OP_LESS_OR_EQUAL, + VK_COMPARE_OP_GREATER, + VK_COMPARE_OP_NOT_EQUAL, + VK_COMPARE_OP_GREATER_OR_EQUAL, + VK_COMPARE_OP_ALWAYS, +}; + +static const VkCompareOp pgraph_stencil_func_vk_map[] = { + VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_LESS, + VK_COMPARE_OP_EQUAL, + VK_COMPARE_OP_LESS_OR_EQUAL, + VK_COMPARE_OP_GREATER, + VK_COMPARE_OP_NOT_EQUAL, + VK_COMPARE_OP_GREATER_OR_EQUAL, + VK_COMPARE_OP_ALWAYS, +}; + +static const VkStencilOp pgraph_stencil_op_vk_map[] = { + 0, + VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_ZERO, + VK_STENCIL_OP_REPLACE, + VK_STENCIL_OP_INCREMENT_AND_CLAMP, + VK_STENCIL_OP_DECREMENT_AND_CLAMP, + VK_STENCIL_OP_INVERT, + VK_STENCIL_OP_INCREMENT_AND_WRAP, + VK_STENCIL_OP_DECREMENT_AND_WRAP, +}; + +static const VkPolygonMode pgraph_polygon_mode_vk_map[] = { + [POLY_MODE_FILL] = VK_POLYGON_MODE_FILL, + [POLY_MODE_POINT] = VK_POLYGON_MODE_POINT, + [POLY_MODE_LINE] = VK_POLYGON_MODE_LINE, +}; + +typedef struct VkColorFormatInfo { + VkFormat vk_format; + VkComponentMapping component_map; +} VkColorFormatInfo; + +static const VkColorFormatInfo kelvin_color_format_vk_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { + VK_FORMAT_R5G6B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { + VK_FORMAT_R5G6B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { + VK_FORMAT_R8G8B8_SNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + + /* Additional information is passed to the pixel shader via the swizzle: + * RED: The depth value. + * GREEN: 0 for 16-bit, 1 for 24 bit + * BLUE: 0 for fixed, 1 for float + */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { + VK_FORMAT_R16_UNORM, // FIXME + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { + // FIXME + // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}}, + VK_FORMAT_R32_UINT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { + // FIXME + // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}}, + VK_FORMAT_R32_UINT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { + VK_FORMAT_R16_UNORM, // FIXME + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { + VK_FORMAT_R16_SFLOAT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { + VK_FORMAT_R16_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { + VK_FORMAT_R8G8B8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { + VK_FORMAT_R8G8B8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R } + }, +}; + +typedef struct BasicSurfaceFormatInfo { + unsigned int bytes_per_pixel; +} BasicSurfaceFormatInfo; + +typedef struct SurfaceFormatInfo { + unsigned int host_bytes_per_pixel; + VkFormat vk_format; + VkImageUsageFlags usage; + VkImageAspectFlags aspect; +} SurfaceFormatInfo; + +static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 }, +}; + +static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = + { + // FIXME: Force alpha to zero + 2, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = + { + 2, + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = + { + // FIXME: Force alpha to zero + 4, + VK_FORMAT_B8G8R8A8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = + { + 4, + VK_FORMAT_B8G8R8A8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = + { + // FIXME: Map channel color + 1, + VK_FORMAT_R8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = + { + // FIXME: Map channel color + 2, + VK_FORMAT_R8G8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, +}; + +static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 }, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 }, +}; + +// FIXME: Actually support stored float format + +static const SurfaceFormatInfo zeta_d16 = { + 2, + VK_FORMAT_D16_UNORM, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT, +}; + +static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = { + 8, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, +}; + +static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = { + 4, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, +}; + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c new file mode 100644 index 0000000000..a8cb08c4a2 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -0,0 +1,59 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" +#include "debug.h" + +#ifndef _WIN32 +#include +#endif + +#ifdef CONFIG_RENDERDOC +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include "thirdparty/renderdoc_app.h" +#endif + +int nv2a_vk_dgroup_indent = 0; + +void pgraph_vk_debug_init(void) +{ +#ifdef CONFIG_RENDERDOC + nv2a_dbg_renderdoc_init(); +#endif +} + +void pgraph_vk_debug_frame_terminator(void) +{ +#ifdef CONFIG_RENDERDOC + if (nv2a_dbg_renderdoc_available()) { + RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api(); + + PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state; + if (rdoc_api->IsTargetControlConnected()) { + if (rdoc_api->IsFrameCapturing()) { + rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + } + if (renderdoc_capture_frames > 0) { + rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + --renderdoc_capture_frames; + } + } + } +#endif +} diff --git a/hw/xbox/nv2a/pgraph/vk/debug.h b/hw/xbox/nv2a/pgraph/vk/debug.h new file mode 100644 index 0000000000..62cd63e592 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/debug.h @@ -0,0 +1,61 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H +#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H + +#define DEBUG_VK 0 + +extern int nv2a_vk_dgroup_indent; + +#define NV2A_VK_XDPRINTF(x, fmt, ...) \ + do { \ + if (x) { \ + for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \ + fprintf(stderr, " "); \ + fprintf(stderr, fmt "\n", ##__VA_ARGS__); \ + } \ + } while (0) + +#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__) + +#define NV2A_VK_DGROUP_BEGIN(fmt, ...) \ + do { \ + NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \ + nv2a_vk_dgroup_indent++; \ + } while (0) + +#define NV2A_VK_DGROUP_END(...) \ + do { \ + nv2a_vk_dgroup_indent--; \ + assert(nv2a_vk_dgroup_indent >= 0); \ + } while (0) + +#define VK_CHECK(x) \ + do { \ + VkResult vk_result = (x); \ + if (vk_result != VK_SUCCESS) { \ + fprintf(stderr, "vk_result = %d\n", vk_result); \ + } \ + assert(vk_result == VK_SUCCESS && "vk check failed"); \ + } while (0) + +void pgraph_vk_debug_frame_terminator(void); + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c new file mode 100644 index 0000000000..595f119ca2 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -0,0 +1,896 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static const char *display_frag_glsl = + "#version 450\n" + "layout(binding = 0) uniform sampler2D tex;\n" + "layout(binding = 1) uniform sampler2D pvideo_tex;\n" + "layout(push_constant, std430) uniform PushConstants {\n" + " bool pvideo_enable;\n" + " vec2 pvideo_in_pos;\n" + " vec4 pvideo_pos;\n" + " vec3 pvideo_scale;\n" + " bool pvideo_color_key_enable;\n" + " vec2 display_size;\n" + " float line_offset;\n" + " vec4 pvideo_color_key;\n" + "};\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord = gl_FragCoord.xy/display_size;\n" + " texCoord.y = 1 - texCoord.y;\n" // GL compat + " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" + " texCoord.y = 1 + rel*(texCoord.y - 1);" + " out_Color.rgba = texture(tex, texCoord);\n" + // " if (pvideo_enable) {\n" + // " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" + // " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" + // " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" + // " greaterThan(screenCoord, output_region.zw));\n" + // " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" + // " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" + // " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" + // " in_st.y *= -1.0;\n" + // " out_Color.rgba = texture(pvideo_tex, in_st);\n" + // " }\n" + // " }\n" + "}\n"; + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorPoolSize pool_sizes = { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 2, + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = 1, + .pPoolSizes = &pool_sizes, + .maxSets = 1, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->display.descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL); + r->display.descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayoutBinding bindings[2]; + + for (int i = 0; i < ARRAY_SIZE(bindings); i++) { + bindings[i] = (VkDescriptorSetLayoutBinding){ + .binding = i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->display.descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout, + NULL); + r->display.descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layout = r->display.descriptor_set_layout; + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->display.descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = &layout, + }; + VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info, + &r->display.descriptor_set)); +} + +static void create_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkAttachmentDescription attachment; + + VkAttachmentReference color_reference; + attachment = (VkAttachmentDescription){ + .format = VK_FORMAT_R8G8B8A8_UNORM, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + color_reference = (VkAttachmentReference){ + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + + VkSubpassDependency dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + }; + + dependency.srcStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_reference, + }; + + VkRenderPassCreateInfo renderpass_create_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &dependency, + }; + VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL, + &r->display.render_pass)); +} + +static void destroy_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + vkDestroyRenderPass(r->device, r->display.render_pass, NULL); + r->display.render_pass = VK_NULL_HANDLE; +} + +static void create_display_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->display.display_frag = + pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl); + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->quad_vert_module->module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->display.display_frag->module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = VK_FALSE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + }; + + VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR }; + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = dynamic_states, + }; + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 0, + .size = r->display.display_frag->push_constants.total_size, + }; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->display.descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &r->display.pipeline_layout)); + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = r->display.pipeline_layout, + .renderPass = r->display.render_pass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, + &r->display.pipeline)); +} + +static void destroy_display_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyPipeline(r->device, r->display.pipeline, NULL); + r->display.pipeline = VK_NULL_HANDLE; +} + +static void create_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkFramebufferCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = r->display.render_pass, + .attachmentCount = 1, + .pAttachments = &r->display.image_view, + .width = r->display.width, + .height = r->display.height, + .layers = 1, + }; + VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL, + &r->display.framebuffer)); +} + +static void destroy_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL); + r->display.framebuffer = NULL; +} + +static void destroy_current_display_image(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (d->image == VK_NULL_HANDLE) { + return; + } + + destroy_frame_buffer(pg); + +#if HAVE_EXTERNAL_MEMORY + glDeleteTextures(1, &d->gl_texture_id); + d->gl_texture_id = 0; + + glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj); + d->gl_memory_obj = 0; + +#ifdef WIN32 + CloseHandle(d->handle); + d->handle = 0; +#endif +#endif + + vkDestroyImageView(r->device, d->image_view, NULL); + d->image_view = VK_NULL_HANDLE; + + vkDestroyImage(r->device, d->image, NULL); + d->image = VK_NULL_HANDLE; + + vkFreeMemory(r->device, d->memory, NULL); + d->memory = VK_NULL_HANDLE; + + d->draw_time = 0; +} + +// FIXME: We may need to use two images. One for actually rendering display, +// and another for GL in the correct tiling mode + +static void create_display_image_from_surface(PGRAPHState *pg, + SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (r->display.image != VK_NULL_HANDLE) { + destroy_current_display_image(pg); + } + + const GLint gl_internal_format = GL_RGBA8; + bool use_optimal_tiling = true; + +#if HAVE_EXTERNAL_MEMORY + GLint num_tiling_types; + glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format, + GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types); + // XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be + // supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not + // be supported so we must use optimal. Default to optimal unless + // linear is explicitly specified... + GLint tiling_types[num_tiling_types]; + glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format, + GL_TILING_TYPES_EXT, num_tiling_types, tiling_types); + for (int i = 0; i < num_tiling_types; i++) { + if (tiling_types[i] == GL_LINEAR_TILING_EXT) { + use_optimal_tiling = false; + break; + } + } +#endif + + // Create image + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + pgraph_apply_scaling_factor(pg, &image_create_info.extent.width, + &image_create_info.extent.height); + + VkExternalMemoryImageCreateInfo external_memory_image_create_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + image_create_info.pNext = &external_memory_image_create_info; + + VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image)); + + // Allocate and bind image memory + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements); + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = memory_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + + VkExportMemoryAllocateInfo export_memory_alloc_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, + .handleTypes = +#ifdef WIN32 + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR +#else + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT +#endif + , + }; + alloc_info.pNext = &export_memory_alloc_info; + + VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory)); + + vkBindImageMemory(r->device, d->image, d->memory, 0); + + // Create Image View + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = d->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_create_info.format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &d->image_view)); + +#if HAVE_EXTERNAL_MEMORY + +#ifdef WIN32 + + VkMemoryGetWin32HandleInfoKHR handle_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .memory = d->memory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + }; + VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle)); + + glCreateMemoryObjectsEXT(1, &d->gl_memory_obj); + glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle); + assert(glGetError() == GL_NO_ERROR); + +#else + + VkMemoryGetFdInfoKHR fd_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = d->memory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd)); + + glCreateMemoryObjectsEXT(1, &d->gl_memory_obj); + glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd); + assert(glIsMemoryObjectEXT(d->gl_memory_obj)); + assert(glGetError() == GL_NO_ERROR); + +#endif // WIN32 + + glGenTextures(1, &d->gl_texture_id); + glBindTexture(GL_TEXTURE_2D, d->gl_texture_id); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, + use_optimal_tiling ? GL_OPTIMAL_TILING_EXT : + GL_LINEAR_TILING_EXT); + glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format, + image_create_info.extent.width, + image_create_info.extent.height, d->gl_memory_obj, 0); + assert(glGetError() == GL_NO_ERROR); + +#endif // HAVE_EXTERNAL_MEMORY + + d->width = image_create_info.extent.width; + d->height = image_create_info.extent.height; + + create_frame_buffer(pg); +} + +static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorImageInfo image_infos[2]; + VkWriteDescriptorSet descriptor_writes[2]; + + // Display surface + image_infos[0] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = surface->image_view, + .sampler = r->display.sampler, + }; + descriptor_writes[0] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->display.descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[0], + }; + + // FIXME: PVIDEO Overlay + image_infos[1] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->dummy_texture.image_view, + .sampler = r->dummy_texture.sampler, + }; + descriptor_writes[1] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->display.descriptor_set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[1], + }; + + vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes), + descriptor_writes, 0, NULL); +} + +static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width, height; + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + int line_offset = surface->pitch / pline_offset; + + /* Adjust viewport height for interlaced mode, used only in 1080i */ + if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { + height *= 2; + } + + pgraph_apply_scaling_factor(pg, &width, &height); + + ShaderUniformLayout *l = &r->display.display_frag->push_constants; + int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache + int line_offset_loc = uniform_index(l, "line_offset"); + uniform2f(l, display_size_loc, width, height); + uniform1f(l, line_offset_loc, line_offset); + +#if 0 // FIXME: PVIDEO overlay + // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. + // Many games seem to pass this value when initializing or tearing down + // PVIDEO. On its own, this generally does not result in the overlay being + // hidden, however there are certain games (e.g., Ultimate Beach Soccer) + // that use an unknown mechanism to hide the overlay without explicitly + // stopping it. + // Since the value seems to be set to 0xFFFFFFFF only in cases where the + // content is not valid, it is probably good enough to treat it as an + // implicit stop. + bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) + && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; + glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled); + if (!enabled) { + return; + } + + hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; + hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + + int in_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); + int in_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); + + int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + + int in_pitch = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); + int in_color = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); + + unsigned int out_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); + unsigned int out_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); + + float scale_x = 1.0f; + float scale_y = 1.0f; + unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; + unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; + if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_x = pvideo_calculate_scale(ds_dx, out_width); + } + if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_y = pvideo_calculate_scale(dt_dy, out_height); + } + + // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results + // in them being capped to the output size, content is not scaled. This is + // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF + // during initialization or teardown. + if (in_width > out_width) { + in_width = floorf((float)out_width * scale_x + 0.5f); + } + if (in_height > out_height) { + in_height = floorf((float)out_height * scale_y + 0.5f); + } + + /* TODO: support other color formats */ + assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + unsigned int out_x = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); + unsigned int out_y = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); + + unsigned int color_key_enabled = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); + glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc, + color_key_enabled); + + // TODO: Verify that masking off the top byte is correct. + // SeaBlade sets a color key of 0x80000000 but the texture passed into the + // shader is cleared to 0 alpha. + unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; + glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); + + assert(offset + in_pitch * in_height <= limit); + hwaddr end = base + offset + in_pitch * in_height; + assert(end <= memory_region_size(d->vram)); + + pgraph_apply_scaling_factor(pg, &out_x, &out_y); + pgraph_apply_scaling_factor(pg, &out_width, &out_height); + + // Translate for the GL viewport origin. + out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); + + glActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( + d->vram_ptr + base + offset, in_width, in_height, in_pitch); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, + GL_UNSIGNED_BYTE, tex_rgba); + g_free(tex_rgba); + glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1); + glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t); + glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc, + out_x, out_y, out_width, out_height); + glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc, + scale_x, scale_y, 1.0f / pg->surface_scale_factor); +#endif +} + +static void render_display(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *disp = &r->display; + + if (disp->draw_time >= surface->draw_time) { + return; + } + + if (r->in_command_buffer && + surface->draw_time >= r->command_buffer_start_time) { + pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING); + } + + update_uniforms(pg, surface); + update_descriptor_set(pg, surface); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout(pg, cmd, surface->image, + surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_transition_image_layout( + pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + VkRenderPassBeginInfo render_pass_begin_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = disp->render_pass, + .framebuffer = disp->framebuffer, + .renderArea.extent.width = disp->width, + .renderArea.extent.height = disp->height, + }; + vkCmdBeginRenderPass(cmd, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + disp->pipeline); + + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + disp->pipeline_layout, 0, 1, &disp->descriptor_set, + 0, NULL); + + VkViewport viewport = { + .width = disp->width, + .height = disp->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }; + vkCmdSetViewport(cmd, 0, 1, &viewport); + + VkRect2D scissor = { + .extent.width = disp->width, + .extent.height = disp->height, + }; + vkCmdSetScissor(cmd, 0, 1, &scissor); + + vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + 0, disp->display_frag->push_constants.total_size, + disp->display_frag->push_constants.allocation); + + vkCmdDraw(cmd, 3, 1, 0, 0); + + vkCmdEndRenderPass(cmd); + +#if 0 + VkImageCopy region = { + .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .srcSubresource.layerCount = 1, + .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .dstSubresource.layerCount = 1, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + }; + pgraph_apply_scaling_factor(pg, ®ion.extent.width, + ®ion.extent.height); + + vkCmdCopyImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); +#endif + + pgraph_vk_transition_image_layout(pg, cmd, surface->image, + surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, disp->image, + VK_FORMAT_R8G8B8_UNORM, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + pgraph_vk_end_single_time_commands(pg, cmd); + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5); + + disp->draw_time = surface->draw_time; +} + +static void create_surface_sampler(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_FALSE, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + }; + + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &r->display.sampler)); +} + +static void destroy_surface_sampler(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroySampler(r->device, r->display.sampler, NULL); + r->display.sampler = VK_NULL_HANDLE; +} + +void pgraph_vk_init_display(PGRAPHState *pg) +{ + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + create_render_pass(pg); + create_display_pipeline(pg); + create_surface_sampler(pg); +} + +void pgraph_vk_finalize_display(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->display.image != VK_NULL_HANDLE) { + destroy_current_display_image(pg); + } + + destroy_surface_sampler(pg); + destroy_display_pipeline(pg); + destroy_render_pass(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); +} + +void pgraph_vk_render_display(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = + pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + return; + } + + unsigned int width = surface->width, height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + PGRAPHVkDisplayState *disp = &r->display; + if (!disp->image || disp->width != width || disp->height != height) { + create_display_image_from_surface(pg, surface); + } + + render_display(pg, surface); +} diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c new file mode 100644 index 0000000000..c4f2cd85e0 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -0,0 +1,1916 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/fast-hash.h" +#include "renderer.h" + +void pgraph_vk_draw_begin(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + pgraph_vk_surface_update(d, true, true, depth_test || stencil_test); + + if (is_nop_draw) { + NV2A_VK_DPRINTF("nop!"); + NV2A_VK_DGROUP_END(); + return; + } +} + +static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + int polygon_mode = r->shader_binding->state.polygon_front_mode; + int primitive_mode = r->shader_binding->state.primitive_mode; + + if (polygon_mode == POLY_MODE_POINT) { + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + + // FIXME: Replace with LUT + switch (primitive_mode) { + case PRIM_TYPE_POINTS: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case PRIM_TYPE_LINES: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case PRIM_TYPE_LINE_LOOP: + // FIXME: line strips, except that the first and last vertices are also used as a line + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case PRIM_TYPE_LINE_STRIP: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case PRIM_TYPE_TRIANGLES: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case PRIM_TYPE_TRIANGLE_STRIP: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case PRIM_TYPE_TRIANGLE_FAN: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + case PRIM_TYPE_QUADS: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + case PRIM_TYPE_QUAD_STRIP: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME + } else if (polygon_mode == POLY_MODE_FILL) { + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + } + assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); + return 0; + default: + assert(!"Invalid primitive_mode"); + return 0; + } +} + +static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + PipelineBinding *snode = container_of(node, PipelineBinding, node); + snode->layout = VK_NULL_HANDLE; + snode->pipeline = VK_NULL_HANDLE; + snode->draw_time = 0; +} + +static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + + assert((!r->in_command_buffer || + snode->draw_time < r->command_buffer_start_time) && + "Pipeline evicted while in use!"); + + vkDestroyPipeline(r->device, snode->pipeline, NULL); + snode->pipeline = VK_NULL_HANDLE; + + vkDestroyPipelineLayout(r->device, snode->layout, NULL); + snode->layout = VK_NULL_HANDLE; + + fprintf(stderr, "released pipeline\n"); +} + +static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + PipelineBinding *snode = container_of(node, PipelineBinding, node); + return memcmp(&snode->key, key, sizeof(PipelineKey)); +} + +static void init_pipeline_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipelineCacheCreateInfo cache_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + .flags = 0, + .initialDataSize = 0, + .pInitialData = NULL, + .pNext = NULL, + }; + VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL, + &r->vk_pipeline_cache)); + + const size_t pipeline_cache_size = 2048; + lru_init(&r->pipeline_cache); + r->pipeline_cache_entries = + g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding)); + assert(r->pipeline_cache_entries != NULL); + for (int i = 0; i < pipeline_cache_size; i++) { + lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node); + } + + r->pipeline_cache.init_node = pipeline_cache_entry_init; + r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare; + r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict; +} + +static void finalize_pipeline_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + lru_flush(&r->pipeline_cache); + g_free(r->pipeline_cache_entries); + r->pipeline_cache_entries = NULL; + + vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL); +} + +static char const *const quad_glsl = + "#version 450\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + +static char const *const solid_frag_glsl = + "#version 450\n" + "layout(location = 0) out vec4 fragColor;\n" + "void main()\n" + "{\n" + " fragColor = vec4(1.0);" + "}\n"; + +static void init_clear_shaders(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl); + r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl); +} + +void pgraph_vk_init_pipelines(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + init_pipeline_cache(pg); + init_clear_shaders(pg); + + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO + }; + VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL, + &r->command_buffer_semaphore)); + + VkFenceCreateInfo fence_info = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + VK_CHECK( + vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence)); +} + +void pgraph_vk_finalize_pipelines(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + finalize_pipeline_cache(pg); + + vkDestroyFence(r->device, r->command_buffer_fence, NULL); + vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL); +} + +static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + state->color_format = r->color_binding ? + r->color_binding->host_fmt.vk_format : + VK_FORMAT_UNDEFINED; + state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format : + VK_FORMAT_UNDEFINED; +} + +static VkRenderPass create_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + NV2A_VK_DPRINTF("Creating render pass"); + + PGRAPHVkState *r = pg->vk_renderer_state; + + VkAttachmentDescription attachments[2]; + int num_attachments = 0; + + bool color = state->color_format != VK_FORMAT_UNDEFINED; + bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED; + + VkAttachmentReference color_reference; + if (color) { + attachments[num_attachments] = (VkAttachmentDescription){ + .format = state->color_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + color_reference = (VkAttachmentReference){ + num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + num_attachments++; + } + + VkAttachmentReference depth_reference; + if (zeta) { + attachments[num_attachments] = (VkAttachmentDescription){ + .format = state->zeta_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + depth_reference = (VkAttachmentReference){ + num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + num_attachments++; + } + + VkSubpassDependency dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + }; + + if (color) { + dependency.srcStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (zeta) { + dependency.srcStageMask |= + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = color ? 1 : 0, + .pColorAttachments = color ? &color_reference : NULL, + .pDepthStencilAttachment = zeta ? &depth_reference : NULL, + }; + + VkRenderPassCreateInfo renderpass_create_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = num_attachments, + .pAttachments = attachments, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &dependency, + }; + VkRenderPass render_pass; + VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL, + &render_pass)); + return render_pass; +} + +static VkRenderPass add_new_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->render_passes_index == r->render_passes_capacity) { + int n_blocks = r->render_passes_capacity; + r->render_passes_capacity = n_blocks ? (n_blocks * 2) : 256; + r->render_passes = + g_realloc_n(r->render_passes, r->render_passes_capacity, + sizeof(*r->render_passes)); + } + + RenderPass *rp = &r->render_passes[r->render_passes_index++]; + memcpy(&rp->state, state, sizeof(*state)); + rp->render_pass = create_render_pass(pg, state); + + return rp->render_pass; +} + +static VkRenderPass get_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < r->render_passes_index; i++) { + if (!memcmp(&r->render_passes[i].state, state, sizeof(*state))) { + return r->render_passes[i].render_pass; + } + } + + return add_new_render_pass(pg, state); +} + +static void create_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DPRINTF("Creating framebuffer"); + + assert(r->color_binding || r->zeta_binding); + + if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + } + + VkImageView attachments[2]; + int attachment_count = 0; + + if (r->color_binding) { + attachments[attachment_count++] = r->color_binding->image_view; + } + if (r->zeta_binding) { + attachments[attachment_count++] = r->zeta_binding->image_view; + } + + SurfaceBinding *binding = r->color_binding ? : r->zeta_binding; + + VkFramebufferCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = r->render_pass, + .attachmentCount = attachment_count, + .pAttachments = attachments, + .width = binding->width, + .height = binding->height, + .layers = 1, + }; + pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height); + VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL, + &r->framebuffers[r->framebuffer_index++])); +} + +static void destroy_framebuffers(PGRAPHState *pg) +{ + NV2A_VK_DPRINTF("Destroying framebuffer"); + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < r->framebuffer_index; i++) { + vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL); + r->framebuffers[i] = VK_NULL_HANDLE; + } + r->framebuffer_index = 0; +} + +static void create_clear_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DGROUP_BEGIN("Creating clear pipeline"); + + PipelineKey key; + memset(&key, 0, sizeof(key)); + key.clear = true; + init_render_pass_state(pg, &key.render_pass_state); + + key.regs[0] = r->clear_parameter; + + uint64_t hash = fast_hash((void *)&key, sizeof(key)); + LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + + if (snode->pipeline != VK_NULL_HANDLE) { + NV2A_VK_DPRINTF("Cache hit"); + r->pipeline_binding_changed = r->pipeline_binding != snode; + r->pipeline_binding = snode; + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN); + memcpy(&snode->key, &key, sizeof(key)); + + bool clear_any_color_channels = + r->clear_parameter & NV097_CLEAR_SURFACE_COLOR; + bool clear_all_color_channels = + (r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) == + (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B | + NV097_CLEAR_SURFACE_A); + bool partial_color_clear = + clear_any_color_channels && !clear_all_color_channels; + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->quad_vert_module->module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->solid_frag_module->module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = + (r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + }; + + if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) { + depth_stencil.stencilTestEnable = VK_TRUE; + depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS; + depth_stencil.front.compareMask = 0xff; + depth_stencil.front.writeMask = 0xff; + depth_stencil.front.reference = 0xff; + depth_stencil.back = depth_stencil.front; + } + + VkColorComponentFlags write_mask = 0; + if (r->clear_parameter & NV097_CLEAR_SURFACE_R) + write_mask |= VK_COLOR_COMPONENT_R_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_G) + write_mask |= VK_COLOR_COMPONENT_G_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_B) + write_mask |= VK_COLOR_COMPONENT_B_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_A) + write_mask |= VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = write_mask, + .blendEnable = VK_TRUE, + .colorBlendOp = VK_BLEND_OP_ADD, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .alphaBlendOp = VK_BLEND_OP_ADD, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = r->color_binding ? 1 : 0, + .pAttachments = r->color_binding ? &color_blend_attachment : NULL, + }; + + VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS }; + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = partial_color_clear ? 3 : 2, + .pDynamicStates = dynamic_states, + }; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + }; + + VkPipelineLayout layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &layout)); + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = layout, + .renderPass = get_render_pass(pg, &key.render_pass_state), + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, &pipeline)); + + snode->pipeline = pipeline; + snode->layout = layout; + snode->render_pass = pipeline_info.renderPass; + snode->draw_time = pg->draw_time; + + r->pipeline_binding = snode; + r->pipeline_binding_changed = true; + + NV2A_VK_DGROUP_END(); +} + +static bool check_render_pass_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->pipeline_binding); + + RenderPassState state; + init_render_pass_state(pg, &state); + + return memcmp(&state, &r->pipeline_binding->key.render_pass_state, + sizeof(state)) != 0; +} + +// Quickly check for any state changes that would require more analysis +static bool check_pipeline_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->pipeline_binding); + + if (r->shader_bindings_changed || r->texture_bindings_changed || + check_render_pass_dirty(pg)) { + return true; + } + + const unsigned int regs[] = { + NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR, + NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3, + NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR, + }; + + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + if (pgraph_is_reg_dirty(pg, regs[i])) { + return true; + } + } + + // FIXME: Use dirty bits instead + if (memcmp(r->vertex_attribute_descriptions, + r->pipeline_binding->key.attribute_descriptions, + r->num_active_vertex_attribute_descriptions * + sizeof(r->vertex_attribute_descriptions[0])) || + memcmp(r->vertex_binding_descriptions, + r->pipeline_binding->key.binding_descriptions, + r->num_active_vertex_binding_descriptions * + sizeof(r->vertex_binding_descriptions[0]))) { + return true; + } + + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY); + + return false; +} + +static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + memset(key, 0, sizeof(*key)); + init_render_pass_state(pg, &key->render_pass_state); + memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState)); + memcpy(key->binding_descriptions, r->vertex_binding_descriptions, + sizeof(key->binding_descriptions[0]) * + r->num_active_vertex_binding_descriptions); + memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions, + sizeof(key->attribute_descriptions[0]) * + r->num_active_vertex_attribute_descriptions); + + // FIXME: Register masking + // FIXME: Use more dynamic state updates + const int regs[] = { + NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR, + NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3, + NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR, + }; + assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs)); + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + key->regs[i] = pgraph_reg_r(pg, regs[i]); + } +} + +static void create_pipeline(PGRAPHState *pg) +{ + NV2A_VK_DGROUP_BEGIN("Creating pipeline"); + + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + pgraph_vk_bind_textures(d); + pgraph_vk_bind_shaders(pg); + + // FIXME: If nothing was dirty, don't even try creating the key or hashing. + // Just use the same pipeline. + if (r->pipeline_binding && !check_pipeline_dirty(pg)) { + return; + } + + PipelineKey key; + init_pipeline_key(pg, &key); + uint64_t hash = fast_hash((void *)&key, sizeof(key)); + + static uint64_t last_hash; + if (hash == last_hash) { + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE); + } + last_hash = hash; + + LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + if (snode->pipeline != VK_NULL_HANDLE) { + NV2A_VK_DPRINTF("Cache hit"); + r->pipeline_binding_changed = r->pipeline_binding != snode; + r->pipeline_binding = snode; + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN); + + memcpy(&snode->key, &key, sizeof(key)); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE); + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + + int num_active_shader_stages = 0; + VkPipelineShaderStageCreateInfo shader_stages[3]; + + if (r->shader_binding->geometry) { + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = r->shader_binding->geometry->module, + .pName = "main", + }; + } + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->shader_binding->vertex->module, + .pName = "main", + }; + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->shader_binding->fragment->module, + .pName = "main", + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = + r->num_active_vertex_binding_descriptions, + .pVertexBindingDescriptions = r->vertex_binding_descriptions, + .vertexAttributeDescriptionCount = + r->num_active_vertex_attribute_descriptions, + .pVertexAttributeDescriptions = r->vertex_attribute_descriptions, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = get_primitive_topology(pg), + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + + void *rasterizer_next_struct = NULL; + + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; + + if (r->provoking_vertex_extension_enabled) { + VkProvokingVertexModeEXT provoking_mode = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ? + VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT : + VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; + + provoking_state = + (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ + .sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .provokingVertexMode = provoking_mode, + }; + rasterizer_next_struct = &provoking_state; + } else { + // FIXME: Handle in shader? + } + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state + .polygon_front_mode], + .lineWidth = 1.0f, + .frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_FRONTFACE) ? + VK_FRONT_FACE_COUNTER_CLOCKWISE : + VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .pNext = rasterizer_next_struct, + }; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) { + uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_CULLCTRL); + assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map)); + rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face]; + } else { + rasterizer.cullMode = VK_CULL_MODE_NONE; + } + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE, + }; + + if (depth_test) { + depth_stencil.depthTestEnable = VK_TRUE; + uint32_t depth_func = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC); + assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map)); + depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func]; + } + + if (stencil_test) { + depth_stencil.stencilTestEnable = VK_TRUE; + uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_FUNC); + uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_REF); + uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); + uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE); + uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); + uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); + uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); + + assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map)); + assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + + depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail]; + depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass]; + depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail]; + depth_stencil.front.compareOp = + pgraph_stencil_func_vk_map[stencil_func]; + depth_stencil.front.compareMask = mask_read; + depth_stencil.front.writeMask = mask_write; + depth_stencil.front.reference = stencil_ref; + depth_stencil.back = depth_stencil.front; + } + + VkColorComponentFlags write_mask = 0; + if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_R_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_G_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_B_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = write_mask, + }; + + float blend_constant[4] = { 0, 0, 0, 0 }; + + if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) { + color_blend_attachment.blendEnable = VK_TRUE; + + uint32_t sfactor = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR); + uint32_t dfactor = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR); + assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map)); + assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map)); + color_blend_attachment.srcColorBlendFactor = + pgraph_blend_factor_vk_map[sfactor]; + color_blend_attachment.dstColorBlendFactor = + pgraph_blend_factor_vk_map[dfactor]; + color_blend_attachment.srcAlphaBlendFactor = + pgraph_blend_factor_vk_map[sfactor]; + color_blend_attachment.dstAlphaBlendFactor = + pgraph_blend_factor_vk_map[dfactor]; + + uint32_t equation = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN); + assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map)); + + color_blend_attachment.colorBlendOp = + pgraph_blend_equation_vk_map[equation]; + color_blend_attachment.alphaBlendOp = + pgraph_blend_equation_vk_map[equation]; + + uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR); + pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant); + } + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = r->color_binding ? 1 : 0, + .pAttachments = r->color_binding ? &color_blend_attachment : NULL, + .blendConstants[0] = blend_constant[0], + .blendConstants[1] = blend_constant[1], + .blendConstants[2] = blend_constant[2], + .blendConstants[3] = blend_constant[3], + }; + + VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR }; + + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = ARRAY_SIZE(dynamic_states), + .pDynamicStates = dynamic_states, + }; + + // /* Clipping */ + // glEnable(GL_CLIP_DISTANCE0); + // glEnable(GL_CLIP_DISTANCE1); + + // /* Polygon offset */ + // /* FIXME: GL implementation-specific, maybe do this in VS? */ + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + float zfactor = *(float *)&zfactor_u32; + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + float zbias = *(float *)&zbias_u32; + rasterizer.depthBiasEnable = VK_TRUE; + rasterizer.depthBiasSlopeFactor = zfactor; + rasterizer.depthBiasConstantFactor = zbias; + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { + rasterizer.depthClampEnable = VK_TRUE; + } + + // FIXME: Dither + // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + // NV_PGRAPH_CONTROL_0_DITHERENABLE)) + // FIXME: point size + // FIXME: Edge Antialiasing + // bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), + // NV_PGRAPH_ANTIALIASING_ENABLE); + // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { + // FIXME: VK_EXT_line_rasterization + // } + + // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { + // FIXME: No direct analog. Just do it with MSAA. + // } + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + // FIXME: Minimize push constants + .size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float), + }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VkPipelineLayout layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &layout)); + + VkGraphicsPipelineCreateInfo pipeline_create_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = num_active_shader_stages, + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = layout, + .renderPass = get_render_pass(pg, &key.render_pass_state), + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_create_info, NULL, &pipeline)); + + snode->pipeline = pipeline; + snode->layout = layout; + snode->render_pass = pipeline_create_info.renderPass; + snode->draw_time = pg->draw_time; + + r->pipeline_binding = snode; + r->pipeline_binding_changed = true; + + NV2A_VK_DGROUP_END(); +} + +static void push_vertex_attrib_values(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Do partial updates + + float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4]; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0]; + attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1]; + attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2]; + attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3]; + } + + vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values), + &attrib_values); +} + +static void bind_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->descriptor_set_index >= 1); + + vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipeline_binding->layout, 0, 1, + &r->descriptor_sets[r->descriptor_set_index - 1], 0, + NULL); +} + +static void begin_query(PGRAPHVkState *r) +{ + assert(r->in_command_buffer); + assert(!r->in_render_pass); + assert(!r->query_in_flight); + + // FIXME: We should handle this. Make the query buffer bigger, but at least + // flush current queries. + assert(r->num_queries_in_flight < r->max_queries_in_flight); + + nv2a_profile_inc_counter(NV2A_PROF_QUERY); + vkCmdResetQueryPool(r->command_buffer, r->query_pool, + r->num_queries_in_flight, 1); + vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight, + VK_QUERY_CONTROL_PRECISE_BIT); + + r->query_in_flight = true; + r->new_query_needed = false; + r->num_queries_in_flight++; +} + +static void end_query(PGRAPHVkState *r) +{ + assert(r->in_command_buffer); + assert(!r->in_render_pass); + assert(r->query_in_flight); + + vkCmdEndQuery(r->command_buffer, r->query_pool, + r->num_queries_in_flight - 1); + r->query_in_flight = false; +} + +static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd, + int index_src, int index_dst) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + StorageBuffer *b_src = &r->storage_buffers[index_src]; + StorageBuffer *b_dst = &r->storage_buffers[index_dst]; + + if (!b_src->buffer_offset) { + return; + } + + VkBufferCopy copy_region = { .size = b_src->buffer_offset }; + vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, ©_region); + + b_src->buffer_offset = 0; +} + +static void begin_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + assert(!r->in_render_pass); + + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES); + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + + assert(r->framebuffer_index > 0); + + VkRenderPassBeginInfo render_pass_begin_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = r->render_pass, + .framebuffer = r->framebuffers[r->framebuffer_index - 1], + .renderArea.extent.width = vp_width, + .renderArea.extent.height = vp_height, + .clearValueCount = 0, + .pClearValues = NULL, + }; + vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + r->in_render_pass = true; + +} + +static void end_render_pass(PGRAPHVkState *r) +{ + if (r->in_render_pass) { + vkCmdEndRenderPass(r->command_buffer); + r->in_render_pass = false; + } +} + +const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = { + [VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY, + [VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE, + [VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN, + [VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE, + [VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY, + [VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING, + [VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL, + [VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH, +}; + +void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(!r->in_draw); + + if (r->in_command_buffer) { + + nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]); + + if (r->in_render_pass) { + end_render_pass(r); + } + if (r->query_in_flight) { + end_query(r); + } + VK_CHECK(vkEndCommandBuffer(r->command_buffer)); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup + sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX); + sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING, + BUFFER_VERTEX_INLINE); + sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM); + bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); + VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer)); + r->in_aux_command_buffer = false; + + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkSubmitInfo submit_infos[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->aux_command_buffer, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &r->command_buffer_semaphore, + }, + { + + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->command_buffer, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &r->command_buffer_semaphore, + .pWaitDstStageMask = &wait_stage, + } + }; + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT); + vkResetFences(r->device, 1, &r->command_buffer_fence); + VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos, + r->command_buffer_fence)); + r->submit_count += 1; + + // Periodically check memory budget + const int max_num_submits_before_budget_update = 5; + if (finish_reason == VK_FINISH_REASON_FLIP_STALL || + (r->submit_count - r->allocator_last_submit_index) > + max_num_submits_before_budget_update) { + + // VMA queries budget via vmaSetCurrentFrameIndex + vmaSetCurrentFrameIndex(r->allocator, r->submit_count); + r->allocator_last_submit_index = r->submit_count; + + pgraph_vk_check_memory_budget(pg); + } + + VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence, + VK_TRUE, UINT64_MAX)); + + r->descriptor_set_index = 0; + r->in_command_buffer = false; + destroy_framebuffers(pg); + } + + NV2AState *d = container_of(pg, NV2AState, pgraph); + pgraph_vk_process_pending_reports_internal(d); +} + +void pgraph_vk_begin_command_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(!r->in_command_buffer); + + VkCommandBufferBeginInfo command_buffer_begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + VK_CHECK(vkBeginCommandBuffer(r->command_buffer, + &command_buffer_begin_info)); + r->command_buffer_start_time = pg->draw_time; + r->in_command_buffer = true; +} + +// FIXME: Refactor below + +void pgraph_vk_ensure_command_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!r->in_command_buffer) { + pgraph_vk_begin_command_buffer(pg); + } +} + +void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + end_render_pass(r); + if (r->query_in_flight) { + end_query(r); + } +} + +VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + pgraph_vk_ensure_command_buffer(pg); + pgraph_vk_ensure_not_in_render_pass(pg); + return r->command_buffer; +} + +void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(cmd == r->command_buffer); +} + +// FIXME: Add more metrics for determining command buffer 'fullness' and +// conservatively flush. Unfortunately there doesn't appear to be a good +// way to determine what the actual maximum capacity of a command buffer +// is, but we are obviously not supposed to endlessly append to one command +// buffer. For other reasons though (like descriptor set amount, surface +// changes, etc) we do flush often. + +static void begin_pre_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->color_binding || r->zeta_binding); + assert(!r->color_binding || r->color_binding->initialized); + assert(!r->zeta_binding || r->zeta_binding->initialized); + + if (pg->clearing) { + create_clear_pipeline(pg); + } else { + create_pipeline(pg); + } + + bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass; + + if (r->framebuffer_dirty || render_pass_dirty) { + pgraph_vk_ensure_not_in_render_pass(pg); + } + if (render_pass_dirty) { + r->render_pass = r->pipeline_binding->render_pass; + } + if (r->framebuffer_dirty) { + create_frame_buffer(pg); + r->framebuffer_dirty = false; + } + if (!pg->clearing) { + pgraph_vk_update_descriptor_sets(pg); + } + if (r->framebuffer_index == 0) { + create_frame_buffer(pg); + } + + pgraph_vk_ensure_command_buffer(pg); +} + +static void begin_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + + // Visibility testing + if (pg->zpass_pixel_count_enable) { + if (r->new_query_needed && r->query_in_flight) { + end_render_pass(r); + end_query(r); + } + if (!r->query_in_flight) { + end_render_pass(r); + begin_query(r); + } + } else if (r->query_in_flight) { + end_render_pass(r); + end_query(r); + } + + bool must_bind_pipeline = r->pipeline_binding_changed; + + if (!r->in_render_pass) { + begin_render_pass(pg); + must_bind_pipeline = true; + } + + if (must_bind_pipeline) { + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipeline_binding->pipeline); + r->pipeline_binding->draw_time = pg->draw_time; + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + + VkViewport viewport = { + .width = vp_width, + .height = vp_height, + .minDepth = 0.0, + .maxDepth = 1.0, + }; + vkCmdSetViewport(r->command_buffer, 0, 1, &viewport); + + /* Surface clip */ + /* FIXME: Consider moving to PSH w/ window clip */ + unsigned int xmin = pg->surface_shape.clip_x - + pg->surface_binding_dim.clip_x, + ymin = pg->surface_shape.clip_y - + pg->surface_binding_dim.clip_y; + + unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, + ymax = ymin + pg->surface_shape.clip_height - 1; + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + VkRect2D scissor = { + .offset.x = xmin, + .offset.y = ymin, + .extent.width = scissor_width, + .extent.height = scissor_height, + }; + vkCmdSetScissor(r->command_buffer, 0, 1, &scissor); + } + + if (!pg->clearing) { + bind_descriptor_sets(pg); + push_vertex_attrib_values(pg); + } + + r->in_draw = true; +} + +static void end_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + assert(r->in_render_pass); + + r->in_draw = false; + + // FIXME: We could clear less + pgraph_clear_dirty_reg_map(pg); +} + +void pgraph_vk_draw_end(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + if (is_nop_draw) { + // FIXME: Check PGRAPH register 0x880. + // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit + // check that will raise an exception in the case that a draw should + // modify the color and/or zeta buffer but the target(s) are masked + // off. This check only seems to trigger during the fragment + // processing, it is legal to attempt a draw that is entirely + // clipped regardless of 0x880. See xemu#635 for context. + NV2A_VK_DPRINTF("nop draw!\n"); + return; + } + + pgraph_vk_flush_draw(d); + + pg->draw_time++; + if (r->color_binding && pgraph_color_write_enabled(pg)) { + r->color_binding->draw_time = pg->draw_time; + } + if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) { + r->zeta_binding->draw_time = pg->draw_time; + } + + pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test); +} + +static int compare_memory_sync_requirement_by_addr(const void *p1, + const void *p2) +{ + const MemorySyncRequirement *l = p1, *r = p2; + if (l->addr < r->addr) + return -1; + if (l->addr > r->addr) + return 1; + return 0; +} + +static void sync_vertex_ram_buffer(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->num_vertex_ram_buffer_syncs == 0) { + return; + } + + // Align sync requirements to page boundaries + NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer"); + + for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) { + NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx + ", %" HWADDR_PRIx " bytes", + r->vertex_ram_buffer_syncs[i].addr, + r->vertex_ram_buffer_syncs[i].size); + + hwaddr start_addr = + r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK; + hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr + + r->vertex_ram_buffer_syncs[i].size; + end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE); + + NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes" + " -> %08" HWADDR_PRIx " %zd bytes", i, + r->vertex_ram_buffer_syncs[i].addr, + r->vertex_ram_buffer_syncs[i].size, start_addr, + end_addr - start_addr); + + r->vertex_ram_buffer_syncs[i].addr = start_addr; + r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr; + } + + // Sort the requirements in increasing order of addresses + qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs, + sizeof(MemorySyncRequirement), + compare_memory_sync_requirement_by_addr); + + // Merge overlapping/adjacent requests to minimize number of tests + MemorySyncRequirement merged[16]; + int num_syncs = 1; + + merged[0] = r->vertex_ram_buffer_syncs[0]; + + for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) { + MemorySyncRequirement *p = &merged[num_syncs - 1]; + MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i]; + + if (t->addr <= (p->addr + p->size)) { + // Merge with previous + hwaddr p_end_addr = p->addr + p->size; + hwaddr t_end_addr = t->addr + t->size; + hwaddr new_end_addr = MAX(p_end_addr, t_end_addr); + p->size = new_end_addr - p->addr; + } else { + merged[num_syncs++] = *t; + } + } + + if (num_syncs < r->num_vertex_ram_buffer_syncs) { + NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs); + } + + for (int i = 0; i < num_syncs; i++) { + hwaddr addr = merged[i].addr; + VkDeviceSize size = merged[i].size; + + NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size); + + if (memory_region_test_and_clear_dirty(d->vram, addr, size, + DIRTY_MEMORY_NV2A)) { + NV2A_VK_DPRINTF("Memory dirty. Synchronizing..."); + pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr, + size); + } + } + + r->num_vertex_ram_buffer_syncs = 0; + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + nv2a_profile_inc_counter(NV2A_PROF_CLEAR); + + bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); + bool write_zeta = + (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); + + // FIXME: If doing a full surface clear, mark the surface for full clear + // and we can just do the clear as part of the surface load. + pgraph_vk_surface_update(d, true, write_color, write_zeta); + + if (!(r->color_binding || r->zeta_binding)) { + /* Nothing bound to clear */ + return; + } + + pg->clearing = true; + r->clear_parameter = parameter; + + unsigned int xmin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN); + unsigned int xmax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX); + unsigned int ymin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN); + unsigned int ymax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX); + + NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax, + ymax, write_color ? " color" : "", + write_zeta ? " zeta" : ""); + + begin_pre_draw(pg); + begin_draw(pg); + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + VkClearRect clear_rect = { + .rect = { + .offset = { .x = xmin, .y = ymin }, + .extent = { .width = scissor_width, .height = scissor_height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + int num_attachments = 0; + VkClearAttachment attachments[2]; + + if (write_color && r->color_binding) { + const bool clear_all_color_channels = + (parameter & NV097_CLEAR_SURFACE_COLOR) == + (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | + NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A); + + if (clear_all_color_channels) { + attachments[num_attachments] = (VkClearAttachment){ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = 0, + }; + pgraph_get_clear_color( + pg, attachments[num_attachments].clearValue.color.float32); + num_attachments++; + } else { + float blend_constants[4]; + pgraph_get_clear_color(pg, blend_constants); + vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect); + vkCmdSetBlendConstants(r->command_buffer, blend_constants); + vkCmdDraw(r->command_buffer, 3, 1, 0, 0); + } + } + + if (write_zeta && r->zeta_binding) { + int stencil_value = 0; + float depth_value = 1.0; + pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value); + + VkImageAspectFlags aspect = 0; + if (parameter & NV097_CLEAR_SURFACE_Z) + aspect |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (parameter & NV097_CLEAR_SURFACE_STENCIL) + aspect |= VK_IMAGE_ASPECT_STENCIL_BIT; + + attachments[num_attachments++] = (VkClearAttachment){ + .aspectMask = aspect, + .clearValue.depthStencil.depth = depth_value, + .clearValue.depthStencil.stencil = stencil_value, + }; + } + + if (num_attachments) { + vkCmdClearAttachments(r->command_buffer, num_attachments, attachments, + 1, &clear_rect); + } + end_draw(pg); + + pg->clearing = false; + + pgraph_vk_set_surface_dirty(pg, write_color, write_zeta); + + NV2A_VK_DGROUP_END(); +} + +#if 0 +static void pgraph_vk_debug_attrs(NV2AState *d) +{ + for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) { + NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx); + for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) { + VertexAttribute *attr = &pg->vertex_attributes[attr_idx]; + if (attr->count) { + char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride; + NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr)); + for (int count_idx = 0; count_idx < attr->count; count_idx++) { + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p); + p += sizeof(float); + break; + default: + assert(0); + break; + } + } + NV2A_VK_DGROUP_END(); + } + } + NV2A_VK_DGROUP_END(); + } +} +#endif + +static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, + VkDeviceSize offset) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(buffer_idx == BUFFER_VERTEX_RAM || + buffer_idx == BUFFER_VERTEX_INLINE); + + VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES]; + VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; + + for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) { + int attr_idx = r->vertex_attribute_descriptions[i].location; + buffers[i] = r->storage_buffers[buffer_idx].buffer; + offsets[i] = offset + r->vertex_attribute_offsets[attr_idx]; + } + + vkCmdBindVertexBuffers(r->command_buffer, 0, + r->num_active_vertex_binding_descriptions, buffers, + offsets); +} + +void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) +{ + NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta, + pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); + + PGRAPHVkState *r = pg->vk_renderer_state; + + /* FIXME: Does this apply to CLEARs too? */ + color = color && pgraph_color_write_enabled(pg); + zeta = zeta && pgraph_zeta_write_enabled(pg); + pg->surface_color.draw_dirty |= color; + pg->surface_zeta.draw_dirty |= zeta; + + if (r->color_binding) { + r->color_binding->draw_dirty |= color; + r->color_binding->frame_time = pg->frame_time; + r->color_binding->cleared = false; + } + + if (r->zeta_binding) { + r->zeta_binding->draw_dirty |= zeta; + r->zeta_binding->frame_time = pg->frame_time; + r->zeta_binding->cleared = false; + } +} + +static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size) +{ + if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + return true; + } + + return false; +} + +void pgraph_vk_flush_draw(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!(r->color_binding || r->zeta_binding)) { + NV2A_VK_DPRINTF("No binding present!!!\n"); + return; + } + + r->num_vertex_ram_buffer_syncs = 0; + + if (pg->draw_arrays_length) { + NV2A_VK_DGROUP_BEGIN("Draw Arrays"); + nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); + + assert(pg->inline_elements_length == 0); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start, + pg->draw_arrays_max_count - 1, false, + 0, pg->draw_arrays_max_count - 1); + sync_vertex_ram_buffer(pg); + + begin_pre_draw(pg); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + for (int i = 0; i < pg->draw_arrays_length; i++) { + uint32_t start = pg->draw_arrays_start[i], + count = pg->draw_arrays_count[i]; + NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count); + vkCmdDraw(r->command_buffer, count, 1, start, 0); + } + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_elements_length) { + NV2A_VK_DGROUP_BEGIN("Inline Elements"); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); + + size_t index_data_size = + pg->inline_elements_length * sizeof(pg->inline_elements[0]); + + ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size); + + uint32_t min_element = (uint32_t)-1; + uint32_t max_element = 0; + for (int i = 0; i < pg->inline_elements_length; i++) { + max_element = MAX(pg->inline_elements[i], max_element); + min_element = MIN(pg->inline_elements[i], min_element); + } + pgraph_vk_bind_vertex_attributes( + d, min_element, max_element, false, 0, + pg->inline_elements[pg->inline_elements_length - 1]); + sync_vertex_ram_buffer(pg); + + begin_pre_draw(pg); + VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer( + pg, pg->inline_elements, index_data_size); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + vkCmdBindIndexBuffer(r->command_buffer, + r->storage_buffers[BUFFER_INDEX].buffer, + buffer_offset, VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0, + 0); + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_buffer_length) { + NV2A_VK_DGROUP_BEGIN("Inline Buffer"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); + assert(pg->inline_array_length == 0); + + size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4; + void *data[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t offset = 0; + + pgraph_vk_bind_vertex_attributes_inline(d); + for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) { + int attr_index = r->vertex_attribute_descriptions[i].location; + + VertexAttribute *attr = &pg->vertex_attributes[attr_index]; + r->vertex_attribute_offsets[attr_index] = offset; + + data[i] = attr->inline_buffer; + sizes[i] = vertex_data_size; + + attr->inline_buffer_populated = false; + offset += vertex_data_size; + } + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset); + + begin_pre_draw(pg); + VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( + pg, data, sizes, r->num_active_vertex_attribute_descriptions); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0); + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_array_length) { + NV2A_VK_DGROUP_BEGIN("Inline Array"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); + + VkDeviceSize inline_array_data_size = pg->inline_array_length * 4; + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, + inline_array_data_size); + + unsigned int offset = 0; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->count == 0) { + continue; + } + + /* FIXME: Double check */ + offset = ROUND_UP(offset, attr->size); + attr->inline_array_offset = offset; + NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i, + attr->size, attr->count); + offset += attr->size * attr->count; + offset = ROUND_UP(offset, attr->size); + } + + unsigned int vertex_size = offset; + unsigned int index_count = pg->inline_array_length * 4 / vertex_size; + + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true, + vertex_size, index_count - 1); + + begin_pre_draw(pg); + void *inline_array_data = pg->inline_array; + VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( + pg, &inline_array_data, &inline_array_data_size, 1); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + vkCmdDraw(r->command_buffer, index_count, 1, 0, 0); + end_draw(pg); + NV2A_VK_DGROUP_END(); + } else { + NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END"); + NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); + } +} diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c new file mode 100644 index 0000000000..fb3aed34f5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/glsl.c @@ -0,0 +1,380 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +#include +#include +#include + +static const glslang_resource_t + resource_limits = { .max_lights = 32, + .max_clip_planes = 6, + .max_texture_units = 32, + .max_texture_coords = 32, + .max_vertex_attribs = 64, + .max_vertex_uniform_components = 4096, + .max_varying_floats = 64, + .max_vertex_texture_image_units = 32, + .max_combined_texture_image_units = 80, + .max_texture_image_units = 32, + .max_fragment_uniform_components = 4096, + .max_draw_buffers = 32, + .max_vertex_uniform_vectors = 128, + .max_varying_vectors = 8, + .max_fragment_uniform_vectors = 16, + .max_vertex_output_vectors = 16, + .max_fragment_input_vectors = 15, + .min_program_texel_offset = -8, + .max_program_texel_offset = 7, + .max_clip_distances = 8, + .max_compute_work_group_count_x = 65535, + .max_compute_work_group_count_y = 65535, + .max_compute_work_group_count_z = 65535, + .max_compute_work_group_size_x = 1024, + .max_compute_work_group_size_y = 1024, + .max_compute_work_group_size_z = 64, + .max_compute_uniform_components = 1024, + .max_compute_texture_image_units = 16, + .max_compute_image_uniforms = 8, + .max_compute_atomic_counters = 8, + .max_compute_atomic_counter_buffers = 1, + .max_varying_components = 60, + .max_vertex_output_components = 64, + .max_geometry_input_components = 64, + .max_geometry_output_components = 128, + .max_fragment_input_components = 128, + .max_image_units = 8, + .max_combined_image_units_and_fragment_outputs = 8, + .max_combined_shader_output_resources = 8, + .max_image_samples = 0, + .max_vertex_image_uniforms = 0, + .max_tess_control_image_uniforms = 0, + .max_tess_evaluation_image_uniforms = 0, + .max_geometry_image_uniforms = 0, + .max_fragment_image_uniforms = 8, + .max_combined_image_uniforms = 8, + .max_geometry_texture_image_units = 16, + .max_geometry_output_vertices = 256, + .max_geometry_total_output_components = 1024, + .max_geometry_uniform_components = 1024, + .max_geometry_varying_components = 64, + .max_tess_control_input_components = 128, + .max_tess_control_output_components = 128, + .max_tess_control_texture_image_units = 16, + .max_tess_control_uniform_components = 1024, + .max_tess_control_total_output_components = 4096, + .max_tess_evaluation_input_components = 128, + .max_tess_evaluation_output_components = 128, + .max_tess_evaluation_texture_image_units = 16, + .max_tess_evaluation_uniform_components = 1024, + .max_tess_patch_components = 120, + .max_patch_vertices = 32, + .max_tess_gen_level = 64, + .max_viewports = 16, + .max_vertex_atomic_counters = 0, + .max_tess_control_atomic_counters = 0, + .max_tess_evaluation_atomic_counters = 0, + .max_geometry_atomic_counters = 0, + .max_fragment_atomic_counters = 8, + .max_combined_atomic_counters = 8, + .max_atomic_counter_bindings = 1, + .max_vertex_atomic_counter_buffers = 0, + .max_tess_control_atomic_counter_buffers = 0, + .max_tess_evaluation_atomic_counter_buffers = 0, + .max_geometry_atomic_counter_buffers = 0, + .max_fragment_atomic_counter_buffers = 1, + .max_combined_atomic_counter_buffers = 1, + .max_atomic_counter_buffer_size = 16384, + .max_transform_feedback_buffers = 4, + .max_transform_feedback_interleaved_components = 64, + .max_cull_distances = 8, + .max_combined_clip_and_cull_distances = 8, + .max_samples = 4, + .max_mesh_output_vertices_nv = 256, + .max_mesh_output_primitives_nv = 512, + .max_mesh_work_group_size_x_nv = 32, + .max_mesh_work_group_size_y_nv = 1, + .max_mesh_work_group_size_z_nv = 1, + .max_task_work_group_size_x_nv = 32, + .max_task_work_group_size_y_nv = 1, + .max_task_work_group_size_z_nv = 1, + .max_mesh_view_count_nv = 4, + .maxDualSourceDrawBuffersEXT = 1, + .limits = { + .non_inductive_for_loops = 1, + .while_loops = 1, + .do_while_loops = 1, + .general_uniform_indexing = 1, + .general_attribute_matrix_vector_indexing = 1, + .general_varying_indexing = 1, + .general_sampler_indexing = 1, + .general_variable_indexing = 1, + .general_constant_matrix_vector_indexing = 1, + } }; + +void pgraph_vk_init_glsl_compiler(void) +{ + glslang_initialize_process(); +} + +void pgraph_vk_finalize_glsl_compiler(void) +{ + glslang_finalize_process(); +} + +GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, + const char *glsl_source) +{ + const glslang_input_t input = { + .language = GLSLANG_SOURCE_GLSL, + .stage = stage, + .client = GLSLANG_CLIENT_VULKAN, + .client_version = GLSLANG_TARGET_VULKAN_1_3, + .target_language = GLSLANG_TARGET_SPV, + .target_language_version = GLSLANG_TARGET_SPV_1_5, + .code = glsl_source, + .default_version = 460, + .default_profile = GLSLANG_NO_PROFILE, + .force_default_version_and_profile = false, + .forward_compatible = false, + .messages = GLSLANG_MSG_DEFAULT_BIT, + .resource = &resource_limits, + }; + + glslang_shader_t *shader = glslang_shader_create(&input); + + if (!glslang_shader_preprocess(shader, &input)) { + fprintf(stderr, + "GLSL preprocessing failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n" + "%s\n", + glslang_shader_get_info_log(shader), + glslang_shader_get_info_debug_log(shader), input.code); + assert(!"glslang preprocess failed"); + glslang_shader_delete(shader); + return NULL; + } + + if (!glslang_shader_parse(shader, &input)) { + fprintf(stderr, + "GLSL parsing failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n" + "%s\n", + glslang_shader_get_info_log(shader), + glslang_shader_get_info_debug_log(shader), + glslang_shader_get_preprocessed_code(shader)); + assert(!"glslang parse failed"); + glslang_shader_delete(shader); + return NULL; + } + + glslang_program_t *program = glslang_program_create(); + glslang_program_add_shader(program, shader); + + if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT | + GLSLANG_MSG_VULKAN_RULES_BIT)) { + fprintf(stderr, + "GLSL linking failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n", + glslang_program_get_info_log(program), + glslang_program_get_info_debug_log(program)); + assert(!"glslang link failed"); + glslang_program_delete(program); + glslang_shader_delete(shader); + return NULL; + } + + glslang_spv_options_t spv_options = { + .validate = true, + +#if defined(CONFIG_RENDERDOC) + .disable_optimizer = true, + .generate_debug_info = true, + .emit_nonsemantic_shader_debug_info = true, + .emit_nonsemantic_shader_debug_source = true, +#endif + }; + glslang_program_SPIRV_generate_with_options(program, stage, &spv_options); + + const char *spirv_messages = glslang_program_SPIRV_get_messages(program); + if (spirv_messages) { + printf("%s\b", spirv_messages); + } + + size_t num_program_bytes = + glslang_program_SPIRV_get_size(program) * sizeof(uint32_t); + + guint8 *data = g_malloc(num_program_bytes); + glslang_program_SPIRV_get(program, (unsigned int *)data); + + glslang_program_delete(program); + glslang_shader_delete(shader); + + return g_byte_array_new_take(data, num_program_bytes); +} + +VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv) +{ + VkShaderModuleCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = spv->len, + .pCode = (uint32_t *)spv->data, + }; + VkShaderModule module; + VK_CHECK( + vkCreateShaderModule(r->device, &create_info, NULL, &module)); + return module; +} + +static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout) +{ + assert(!layout->uniforms); + + layout->num_uniforms = block->member_count; + layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform)); + layout->total_size = block->size; + layout->allocation = g_malloc0(block->size); + + for (uint32_t k = 0; k < block->member_count; ++k) { + const SpvReflectBlockVariable *member = &block->members[k]; + + assert(member->array.dims_count < 2); + + layout->uniforms[k] = (ShaderUniform){ + .name = strdup(member->name), + .offset = member->offset, + .dim_v = MAX(1, member->numeric.vector.component_count), + .dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count), + .stride = MAX(member->array.stride, member->numeric.matrix.stride), + }; + + // fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n", + // layout->uniforms[k].name, + // layout->uniforms[k].offset, + // layout->uniforms[k].dim_v, + // layout->uniforms[k].dim_a, + // layout->uniforms[k].stride + // ); + } + // fprintf(stderr, "--\n"); +} + +static void init_layout_from_spv(ShaderModuleInfo *info) +{ + SpvReflectResult result = spvReflectCreateShaderModule( + info->spirv->len, info->spirv->data, &info->reflect_module); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to create SPIR-V shader module"); + + uint32_t descriptor_set_count = 0; + result = spvReflectEnumerateDescriptorSets(&info->reflect_module, + &descriptor_set_count, NULL); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to enumerate descriptor sets"); + + info->descriptor_sets = + g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *)); + result = spvReflectEnumerateDescriptorSets( + &info->reflect_module, &descriptor_set_count, info->descriptor_sets); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to enumerate descriptor sets"); + + info->uniforms.num_uniforms = 0; + info->uniforms.uniforms = NULL; + + for (uint32_t i = 0; i < descriptor_set_count; ++i) { + const SpvReflectDescriptorSet *descriptor_set = + info->descriptor_sets[i]; + for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) { + const SpvReflectDescriptorBinding *binding = + descriptor_set->bindings[j]; + if (binding->descriptor_type != + SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + continue; + } + + const SpvReflectBlockVariable *block = &binding->block; + block_to_uniforms(block, &info->uniforms); + } + } + + info->push_constants.num_uniforms = 0; + info->push_constants.uniforms = NULL; + assert(info->reflect_module.push_constant_block_count < 2); + if (info->reflect_module.push_constant_block_count) { + block_to_uniforms(&info->reflect_module.push_constant_blocks[0], + &info->push_constants); + } +} + +static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage) +{ + switch (stage) { + case VK_SHADER_STAGE_GEOMETRY_BIT: + return GLSLANG_STAGE_GEOMETRY; + case VK_SHADER_STAGE_VERTEX_BIT: + return GLSLANG_STAGE_VERTEX; + case VK_SHADER_STAGE_FRAGMENT_BIT: + return GLSLANG_STAGE_FRAGMENT; + case VK_SHADER_STAGE_COMPUTE_BIT: + return GLSLANG_STAGE_COMPUTE; + default: + assert(0); + } +} + +ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl( + PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl) +{ + ShaderModuleInfo *info = g_malloc0(sizeof(*info)); + info->glsl = strdup(glsl); + info->spirv = pgraph_vk_compile_glsl_to_spv( + vk_shader_stage_to_glslang_stage(stage), glsl); + info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv); + init_layout_from_spv(info); + return info; +} + +static void finalize_uniform_layout(ShaderUniformLayout *layout) +{ + for (int i = 0; i < layout->num_uniforms; i++) { + free((void*)layout->uniforms[i].name); + } + if (layout->uniforms) { + g_free(layout->uniforms); + } +} + +void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info) +{ + if (info->glsl) { + free(info->glsl); + } + finalize_uniform_layout(&info->uniforms); + finalize_uniform_layout(&info->push_constants); + free(info->descriptor_sets); + spvReflectDestroyShaderModule(&info->reflect_module); + vkDestroyShaderModule(r->device, info->module, NULL); + g_byte_array_unref(info->spirv); + g_free(info); +} diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.h b/hw/xbox/nv2a/pgraph/vk/glsl.h new file mode 100644 index 0000000000..3f6ccd9b3a --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/glsl.h @@ -0,0 +1,205 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H +#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H + +#include "qemu/osdep.h" +#include +#include +#include + +typedef struct ShaderUniform { + const char *name; + size_t dim_v; + size_t dim_a; + size_t align; + size_t stride; + size_t offset; +} ShaderUniform; + +typedef struct ShaderUniformLayout { + ShaderUniform *uniforms; + size_t num_uniforms; + size_t total_size; + void *allocation; +} ShaderUniformLayout; + +static inline void uniform_std140(ShaderUniformLayout *layout) +{ + size_t offset = 0; + + for (int i = 0; i < layout->num_uniforms; i++) { + ShaderUniform *u = &layout->uniforms[i]; + size_t size = sizeof(float); // float or int + size_t align = size; + size_t stride = 0; + + size *= u->dim_v; + align *= u->dim_v == 3 ? 4 : u->dim_v; + + // If an array, each element is padded to vec4. + if (u->dim_a > 1) { + align = 4 * sizeof(float); + stride = align; + size = u->dim_a * align; + } else { + align = size; + stride = 0; + } + + offset = ROUND_UP(offset, align); + + u->align = align; + u->offset = offset; + u->stride = stride; + + offset += size; + } + + layout->total_size = offset; + assert(layout->total_size); +} + +static inline void uniform_std430(ShaderUniformLayout *layout) +{ + size_t offset = 0; + + for (int i = 0; i < layout->num_uniforms; i++) { + ShaderUniform *u = &layout->uniforms[i]; + size_t size = sizeof(float); // float or int + size *= u->dim_v; + size_t align = size; + size *= u->dim_a; + + offset = ROUND_UP(offset, align); + + u->align = align; + u->offset = offset; + u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0; + + offset += size; + } + + layout->total_size = offset; + assert(layout->total_size); +} + +static inline int uniform_index(ShaderUniformLayout *layout, const char *name) +{ + for (int i = 0; i < layout->num_uniforms; i++) { + if (!strcmp(layout->uniforms[i].name, name)) { + return i + 1; + } + } + + return -1; +} + +static inline +void *uniform_ptr(ShaderUniformLayout *layout, int idx) +{ + assert(idx > 0 && "invalid uniform index"); + + return (char *)layout->allocation + layout->uniforms[idx - 1].offset; +} + +static inline +void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count) +{ + assert(idx > 0 && "invalid uniform index"); + + ShaderUniform *u = &layout->uniforms[idx - 1]; + const size_t element_size = value_size * u->dim_v; + + size_t bytes_remaining = value_size * count; + char *p_out = uniform_ptr(layout, idx); + char *p_max = p_out + layout->total_size; + char *p_in = (char *)values; + + int index = 0; + while (bytes_remaining) { + assert(p_out < p_max); + assert(index < u->dim_a); + memcpy(p_out, p_in, element_size); + bytes_remaining -= element_size; + p_out += u->stride; + p_in += element_size; + index += 1; + } +} + +static inline +void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values) +{ + uniform_copy(layout, idx, values, sizeof(float), count); +} + +static inline +void uniform1f(ShaderUniformLayout *layout, int idx, float value) +{ + uniform1fv(layout, idx, 1, &value); +} + +static inline +void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1) +{ + float values[] = { v0, v1 }; + uniform1fv(layout, idx, 2, values); +} + +static inline +void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3) +{ + float values[] = { v0, v1, v2, v3 }; + uniform1fv(layout, idx, 4, values); +} + +static inline +void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values) +{ + uniform1fv(layout, idx, 4, values); +} + +static inline +void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values) +{ + uniform1fv(layout, idx, 4 * 4, values); +} + +static inline +void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values) +{ + uniform_copy(layout, idx, values, sizeof(int32_t), count); +} + +static inline +void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value) +{ + uniform1iv(layout, idx, 1, &value); +} + +static inline +void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3) +{ + int values[] = { v0, v1, v2, v3 }; + uniform1iv(layout, idx, 4, values); +} + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c new file mode 100644 index 0000000000..1161d81f54 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/image.c @@ -0,0 +1,209 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static bool check_format_has_depth_component(VkFormat format) +{ + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || + format == VK_FORMAT_D24_UNORM_S8_UINT || + format == VK_FORMAT_D16_UNORM; +} + +static bool check_format_has_stencil_component(VkFormat format) +{ + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || + format == VK_FORMAT_D24_UNORM_S8_UINT; +} + +void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, + VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout) +{ + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .oldLayout = oldLayout, + .newLayout = newLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + if (check_format_has_depth_component(format)) { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + + if (check_format_has_stencil_component(format)) { + barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } else { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } + + VkPipelineStageFlags sourceStage; + VkPipelineStageFlags destinationStage; + + // Undefined -> Dst + if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Undefined -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Undefined -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + + // Dst -> Shader Read + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + // Dst -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Dst -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + + // Dst -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Shader Read -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Shader Read -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Color -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Color -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Color -> Shader Read + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + // Depth -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + + sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Depth -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Src -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Src -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + + // Src -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + } else { + assert(!"unsupported layout transition!"); + } + + vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0, + NULL, 0, NULL, 1, &barrier); +} diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c new file mode 100644 index 0000000000..4023fd5858 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -0,0 +1,662 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "ui/xemu-settings.h" +#include "renderer.h" +#include "xemu-version.h" + +#include +#include +#include + +#include + +typedef GArray VkExtensionPropertiesArray; +typedef GArray StringArray; + +static bool enable_validation = false; + +static char const *const validation_layers[] = { + "VK_LAYER_KHRONOS_validation", +}; + +static char const *const required_instance_extensions[] = { + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, +}; + +static char const *const required_device_extensions[] = { + VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, +#ifdef WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, +#else + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, +#endif +}; + +static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) +{ + NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage); + fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage); + + if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) && + (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) { + exit(1); + } + return VK_FALSE; +} + +static bool check_validation_layer_support(void) +{ + uint32_t num_available_layers; + vkEnumerateInstanceLayerProperties(&num_available_layers, NULL); + + g_autofree VkLayerProperties *available_layers = + g_malloc_n(num_available_layers, sizeof(VkLayerProperties)); + vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers); + + for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) { + bool found = false; + for (int j = 0; j < num_available_layers; j++) { + if (!strcmp(validation_layers[i], available_layers[j].layerName)) { + found = true; + break; + } + } + if (!found) { + fprintf(stderr, "desired validation layer not found: %s\n", + validation_layers[i]); + return false; + } + } + + return true; +} + +static SDL_Window *create_window(void) +{ + SDL_Window *window = SDL_CreateWindow( + "SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN); + + if (window == NULL) { + fprintf(stderr, "%s: Failed to create window\n", __func__); + SDL_Quit(); + exit(1); + } + + return window; +} + +static VkExtensionPropertiesArray * +get_available_instance_extensions(PGRAPHState *pg) +{ + uint32_t num_extensions = 0; + + VK_CHECK( + vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL)); + + VkExtensionPropertiesArray *extensions = g_array_sized_new( + FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions); + + g_array_set_size(extensions, num_extensions); + VK_CHECK(vkEnumerateInstanceExtensionProperties( + NULL, &num_extensions, (VkExtensionProperties *)extensions->data)); + + return extensions; +} + +static bool +is_extension_available(VkExtensionPropertiesArray *available_extensions, + const char *extension_name) +{ + for (int i = 0; i < available_extensions->len; i++) { + VkExtensionProperties *e = + &g_array_index(available_extensions, VkExtensionProperties, i); + if (!strcmp(e->extensionName, extension_name)) { + return true; + } + } + + return false; +} + +static StringArray *get_required_instance_extension_names(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // Add instance extensions SDL lists as required + unsigned int sdl_count = 0; + SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL); + + StringArray *extensions = + g_array_sized_new(FALSE, FALSE, sizeof(char *), + sdl_count + ARRAY_SIZE(required_instance_extensions)); + + if (sdl_count) { + g_array_set_size(extensions, sdl_count); + SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, + (const char **)extensions->data); + } + + // Add additional required extensions + g_array_append_vals(extensions, required_instance_extensions, + ARRAY_SIZE(required_instance_extensions)); + + return extensions; +} + +static bool +add_extension_if_available(VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names, + const char *desired_extension_name) +{ + if (is_extension_available(available_extensions, desired_extension_name)) { + g_array_append_val(enabled_extension_names, desired_extension_name); + return true; + } + + fprintf(stderr, "Warning: extension not available: %s\n", + desired_extension_name); + return false; +} + +static void +add_optional_instance_extension_names(PGRAPHState *pg, + VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->debug_utils_extension_enabled = + g_config.display.vulkan.validation_layers && + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_DEBUG_UTILS_EXTENSION_NAME); +} + +static void create_instance(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->window = create_window(); + + VK_CHECK(volkInitialize()); + + VkApplicationInfo app_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "xemu", + .applicationVersion = VK_MAKE_VERSION( + xemu_version_major, xemu_version_minor, xemu_version_patch), + .pEngineName = "No Engine", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = VK_API_VERSION_1_3, + }; + + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_instance_extensions(pg); + + g_autofree StringArray *enabled_extension_names = + get_required_instance_extension_names(pg); + + bool all_required_extensions_available = true; + for (int i = 0; i < enabled_extension_names->len; i++) { + const char *required_extension = + g_array_index(enabled_extension_names, const char *, i); + if (!is_extension_available(available_extensions, required_extension)) { + fprintf(stderr, + "Error: Required instance extension not available: %s\n", + required_extension); + all_required_extensions_available = false; + } + } + assert(all_required_extensions_available); + + add_optional_instance_extension_names(pg, available_extensions, + enabled_extension_names); + + fprintf(stderr, "Enabled instance extensions:\n"); + for (int i = 0; i < enabled_extension_names->len; i++) { + fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + } + + VkInstanceCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app_info, + .enabledExtensionCount = enabled_extension_names->len, + .ppEnabledExtensionNames = + &g_array_index(enabled_extension_names, const char *, 0), + }; + + VkDebugUtilsMessengerCreateInfoEXT dbg_create_info; + if (r->debug_utils_extension_enabled) { + dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = debugCallback, + }; + } + + enable_validation = g_config.display.vulkan.validation_layers; + + if (enable_validation) { + if (check_validation_layer_support()) { + fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n"); + create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); + create_info.ppEnabledLayerNames = validation_layers; + if (r->debug_utils_extension_enabled) { + create_info.pNext = + (VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info; + } + } else { + fprintf(stderr, "Warning: validation layers not available\n"); + enable_validation = false; + } + } + + VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance)); + + volkLoadInstance(r->instance); +} + +static bool is_queue_family_indicies_complete(QueueFamilyIndices indices) +{ + return indices.queue_family >= 0; +} + +QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device) +{ + QueueFamilyIndices indices = { + .queue_family = -1, + }; + + uint32_t num_queue_families = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL); + + g_autofree VkQueueFamilyProperties *queue_families = + g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties)); + vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, + queue_families); + + for (int i = 0; i < num_queue_families; i++) { + VkQueueFamilyProperties queueFamily = queue_families[i]; + // FIXME: Support independent graphics, compute queues + int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + if ((queueFamily.queueFlags & required_flags) == required_flags) { + indices.queue_family = i; + } + if (is_queue_family_indicies_complete(indices)) { + break; + } + } + + return indices; +} + +static VkExtensionPropertiesArray * +get_available_device_extensions(VkPhysicalDevice device) +{ + uint32_t num_extensions = 0; + + VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions, + NULL)); + + VkExtensionPropertiesArray *extensions = g_array_sized_new( + FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions); + + g_array_set_size(extensions, num_extensions); + VK_CHECK(vkEnumerateDeviceExtensionProperties( + device, NULL, &num_extensions, + (VkExtensionProperties *)extensions->data)); + + return extensions; +} + +static StringArray *get_required_device_extension_names(void) +{ + StringArray *extensions = + g_array_sized_new(FALSE, FALSE, sizeof(char *), + ARRAY_SIZE(required_device_extensions)); + + g_array_append_vals(extensions, required_device_extensions, + ARRAY_SIZE(required_device_extensions)); + + return extensions; +} + +static void add_optional_device_extension_names( + PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->custom_border_color_extension_enabled = + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + + r->provoking_vertex_extension_enabled = + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + + r->memory_budget_extension_enabled = add_extension_if_available( + available_extensions, enabled_extension_names, + VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); +} + +static bool check_device_support_required_extensions(VkPhysicalDevice device) +{ + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_device_extensions(device); + + for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) { + if (!is_extension_available(available_extensions, + required_device_extensions[i])) { + fprintf(stderr, "required device extension not found: %s\n", + required_device_extensions[i]); + return false; + } + } + + return true; +} + +static bool is_device_compatible(VkPhysicalDevice device) +{ + QueueFamilyIndices indices = pgraph_vk_find_queue_families(device); + + return is_queue_family_indicies_complete(indices) && + check_device_support_required_extensions(device); + // FIXME: Check formats + // FIXME: Check vram +} + +static void select_physical_device(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t num_physical_devices = 0; + + vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL); + if (num_physical_devices == 0) { + assert(!"failed to find GPUs with Vulkan support"); + } + + g_autofree VkPhysicalDevice *devices = + g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice)); + vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices); + + fprintf(stderr, "Available physical devices:\n"); + for (int i = 0; i < num_physical_devices; i++) { + vkGetPhysicalDeviceProperties(devices[i], &r->device_props); + fprintf(stderr, "- %s\n", r->device_props.deviceName); + } + + // FIXME: Store preferred device + + r->physical_device = VK_NULL_HANDLE; + for (int i = 0; i < num_physical_devices; i++) { + if (is_device_compatible(devices[i])) { + r->physical_device = devices[i]; + break; + } + } + if (r->physical_device == VK_NULL_HANDLE) { + assert(!"failed to find a suitable GPU"); + } + + vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props); + fprintf(stderr, + "Selected physical device: %s\n" + "- Vendor: %x, Device: %x\n" + "- Driver Version: %d.%d.%d\n", + r->device_props.deviceName, + r->device_props.vendorID, + r->device_props.deviceID, + VK_VERSION_MAJOR(r->device_props.driverVersion), + VK_VERSION_MINOR(r->device_props.driverVersion), + VK_VERSION_PATCH(r->device_props.driverVersion)); + + size_t vsh_attr_values_size = + NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); + assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size); +} + +static void create_logical_device(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QueueFamilyIndices indices = + pgraph_vk_find_queue_families(r->physical_device); + + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_device_extensions(r->physical_device); + + g_autofree StringArray *enabled_extension_names = + get_required_device_extension_names(); + + add_optional_device_extension_names(pg, available_extensions, + enabled_extension_names); + + fprintf(stderr, "Enabled device extensions:\n"); + for (int i = 0; i < enabled_extension_names->len; i++) { + fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + } + + float queuePriority = 1.0f; + + VkDeviceQueueCreateInfo queue_create_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = indices.queue_family, + .queueCount = 1, + .pQueuePriorities = &queuePriority, + }; + + // Ensure device supports required features + VkPhysicalDeviceFeatures available_features, enabled_features; + vkGetPhysicalDeviceFeatures(r->physical_device, &available_features); + memset(&enabled_features, 0, sizeof(enabled_features)); + + struct { + const char *name; + VkBool32 available, *enabled; + } required_features[] = { + #define F(n) { #n, available_features.n, &enabled_features.n } + F(shaderClipDistance), + F(geometryShader), + F(shaderTessellationAndGeometryPointSize), + F(depthClamp), + F(occlusionQueryPrecise), + #undef F + }; + + bool all_features_available = true; + for (int i = 0; i < ARRAY_SIZE(required_features); i++) { + if (required_features[i].available != VK_TRUE) { + fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name); + all_features_available = false; + } + *required_features[i].enabled = VK_TRUE; + } + assert(all_features_available); + + void *next_struct = NULL; + + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features; + if (r->provoking_vertex_extension_enabled) { + provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){ + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .provokingVertexLast = VK_TRUE, + .pNext = next_struct, + }; + next_struct = &provoking_vertex_features; + } + + VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features; + if (r->custom_border_color_extension_enabled) { + custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){ + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, + .customBorderColors = VK_TRUE, + .pNext = next_struct, + }; + next_struct = &custom_border_features; + } + + VkDeviceCreateInfo device_create_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &queue_create_info, + .pEnabledFeatures = &enabled_features, + .enabledExtensionCount = enabled_extension_names->len, + .ppEnabledExtensionNames = + &g_array_index(enabled_extension_names, const char *, 0), + .pNext = next_struct, + }; + + if (enable_validation) { + device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); + device_create_info.ppEnabledLayerNames = validation_layers; + } + + VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL, + &r->device)); + + vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue); +} + +uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, + VkMemoryPropertyFlags properties) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPhysicalDeviceMemoryProperties prop; + vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop); + for (uint32_t i = 0; i < prop.memoryTypeCount; i++) { + if ((prop.memoryTypes[i].propertyFlags & properties) == properties && + type_bits & (1 << i)) { + return i; + } + } + return 0xFFFFFFFF; // Unable to find memoryType +} + +static void init_allocator(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VmaVulkanFunctions vulkanFunctions = { + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + .vkGetInstanceProcAddr = vkGetInstanceProcAddr, + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + .vkGetDeviceProcAddr = vkGetDeviceProcAddr, + .vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties, + .vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties, + .vkAllocateMemory = vkAllocateMemory, + .vkFreeMemory = vkFreeMemory, + .vkMapMemory = vkMapMemory, + .vkUnmapMemory = vkUnmapMemory, + .vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges, + .vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges, + .vkBindBufferMemory = vkBindBufferMemory, + .vkBindImageMemory = vkBindImageMemory, + .vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements, + .vkGetImageMemoryRequirements = vkGetImageMemoryRequirements, + .vkCreateBuffer = vkCreateBuffer, + .vkDestroyBuffer = vkDestroyBuffer, + .vkCreateImage = vkCreateImage, + .vkDestroyImage = vkDestroyImage, + .vkCmdCopyBuffer = vkCmdCopyBuffer, + #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + .vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2, + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + .vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2, + #endif + #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. + .vkBindBufferMemory2KHR = vkBindBufferMemory2, + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. + .vkBindImageMemory2KHR = vkBindImageMemory2, + #endif + #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. + .vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR, + #endif + #if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + .vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements, + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + .vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements, + #endif + }; + + VmaAllocatorCreateInfo create_info = { + .flags = (r->memory_budget_extension_enabled ? + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT : + 0), + .vulkanApiVersion = VK_API_VERSION_1_3, + .instance = r->instance, + .physicalDevice = r->physical_device, + .device = r->device, + .pVulkanFunctions = &vulkanFunctions, + }; + + VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator)); +} + +static void finalize_allocator(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vmaDestroyAllocator(r->allocator); +} + +void pgraph_vk_init_instance(PGRAPHState *pg) +{ + create_instance(pg); + select_physical_device(pg); + create_logical_device(pg); + init_allocator(pg); +} + +void pgraph_vk_finalize_instance(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + finalize_allocator(pg); + vkDestroyDevice(r->device, NULL); + r->device = VK_NULL_HANDLE; + + vkDestroyInstance(r->instance, NULL); + r->instance = VK_NULL_HANDLE; +} diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build new file mode 100644 index 0000000000..24c2474cb9 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/meson.build @@ -0,0 +1,24 @@ +if vulkan.found() + +specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen, + files( + 'blit.c', + 'buffer.c', + 'command.c', + 'debug.c', + 'display.c', + 'draw.c', + 'glsl.c', + 'image.c', + 'instance.c', + 'renderer.c', + 'reports.c', + 'shaders.c', + 'surface-compute.c', + 'surface.c', + 'texture.c', + 'vertex.c', + ) + ]) + +endif diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c new file mode 100644 index 0000000000..f947aa39e5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -0,0 +1,266 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +#include "gloffscreen.h" + +#if HAVE_EXTERNAL_MEMORY +static GloContext *g_gl_context; + +static void gl_context_init(void) +{ + g_gl_context = glo_context_create(); +} +#endif + +static void pgraph_vk_init_thread(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + +#if HAVE_EXTERNAL_MEMORY + glo_set_current(g_gl_context); +#endif + + pgraph_vk_init_instance(pg); + pgraph_vk_init_command_buffers(pg); + pgraph_vk_init_buffers(d); + pgraph_vk_init_surfaces(pg); + pgraph_vk_init_shaders(pg); + pgraph_vk_init_pipelines(pg); + pgraph_vk_init_textures(pg); + pgraph_vk_init_reports(pg); + pgraph_vk_init_compute(pg); + pgraph_vk_init_display(pg); +} + +static void pgraph_vk_finalize(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pgraph_vk_finalize_display(pg); + pgraph_vk_finalize_compute(pg); + pgraph_vk_finalize_reports(pg); + pgraph_vk_finalize_textures(pg); + pgraph_vk_finalize_pipelines(pg); + pgraph_vk_finalize_shaders(pg); + pgraph_vk_finalize_surfaces(pg); + pgraph_vk_finalize_buffers(d); + pgraph_vk_finalize_command_buffers(pg); + pgraph_vk_finalize_instance(pg); +} + +static void pgraph_vk_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH); + pgraph_vk_surface_flush(d); + pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); + pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, + memory_region_size(d->vram)); + for (int i = 0; i < 4; i++) { + pg->texture_dirty[i] = true; + } + + /* FIXME: Flush more? */ + + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_vk_sync(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pgraph_vk_render_display(pg); + + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +static void pgraph_vk_process_pending(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + if (qatomic_read(&r->downloads_pending) || + qatomic_read(&r->download_dirty_surfaces_pending) || + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) + ) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&r->downloads_pending)) { + pgraph_vk_process_pending_downloads(d); + } + if (qatomic_read(&r->download_dirty_surfaces_pending)) { + pgraph_vk_download_dirty_surfaces(d); + } + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_vk_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_vk_flush(d); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_vk_flip_stall(NV2AState *d) +{ + pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL); + pgraph_vk_debug_frame_terminator(); +} + +static void pgraph_vk_pre_savevm_trigger(NV2AState *d) +{ + qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true); + qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); +} + +static void pgraph_vk_pre_savevm_wait(NV2AState *d) +{ + qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); +} + +static void pgraph_vk_pre_shutdown_trigger(NV2AState *d) +{ + // qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true); + // qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete); +} + +static void pgraph_vk_pre_shutdown_wait(NV2AState *d) +{ + // qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete); +} + +static int pgraph_vk_get_framebuffer_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + qemu_mutex_lock(&d->pfifo.lock); + // FIXME: Possible race condition with pgraph, consider lock + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + qemu_mutex_unlock(&d->pfifo.lock); + return 0; + } + + assert(surface->color); + + surface->frame_time = pg->frame_time; + +#if HAVE_EXTERNAL_MEMORY + qemu_event_reset(&d->pgraph.sync_complete); + qatomic_set(&pg->sync_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.sync_complete); + return r->display.gl_texture_id; +#else + qemu_mutex_unlock(&d->pfifo.lock); + pgraph_vk_wait_for_surface_download(surface); + return 0; +#endif +} + +static void pgraph_vk_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState)); + + pgraph_vk_debug_init(); +} + +static PGRAPHRenderer pgraph_vk_renderer = { + .type = CONFIG_DISPLAY_RENDERER_VULKAN, + .name = "Vulkan", + .ops = { + .init = pgraph_vk_init, +#if HAVE_EXTERNAL_MEMORY + .early_context_init = gl_context_init, +#endif + .init_thread = pgraph_vk_init_thread, + .finalize = pgraph_vk_finalize, + .clear_report_value = pgraph_vk_clear_report_value, + .clear_surface = pgraph_vk_clear_surface, + .draw_begin = pgraph_vk_draw_begin, + .draw_end = pgraph_vk_draw_end, + .flip_stall = pgraph_vk_flip_stall, + .flush_draw = pgraph_vk_flush_draw, + .get_report = pgraph_vk_get_report, + .image_blit = pgraph_vk_image_blit, + .pre_savevm_trigger = pgraph_vk_pre_savevm_trigger, + .pre_savevm_wait = pgraph_vk_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_vk_pre_shutdown_wait, + .process_pending = pgraph_vk_process_pending, + .process_pending_reports = pgraph_vk_process_pending_reports, + .surface_update = pgraph_vk_surface_update, + .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor, + .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor, + .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_vk_renderer); +} + +void pgraph_vk_check_memory_budget(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPhysicalDeviceMemoryProperties const *props; + vmaGetMemoryProperties(r->allocator, &props); + + g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget)); + vmaGetHeapBudgets(r->allocator, budgets); + + const float budget_threshold = 0.8; + bool near_budget = false; + + for (int i = 0; i < props->memoryHeapCount; i++) { + VmaBudget *b = &budgets[i]; + float use_to_budget_ratio = + (double)b->statistics.allocationBytes / (double)b->budget; + NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i, + b->statistics.allocationBytes / (1024 * 1024), + b->budget / (1024 * 1024), use_to_budget_ratio * 100); + near_budget |= use_to_budget_ratio > budget_threshold; + } + + // If any heaps are near budget, free up some resources + if (near_budget) { + pgraph_vk_trim_texture_cache(pg); + } + +#if 0 + char *s; + vmaBuildStatsString(r->allocator, &s, VK_TRUE); + puts(s); + vmaFreeStatsString(r->allocator, s); +#endif +} diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h new file mode 100644 index 0000000000..a509de8d71 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -0,0 +1,526 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H +#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H + +#define VK_NO_PROTOTYPES 1 + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/lru.h" +#include "hw/hw.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/surface.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +#include +#include +#include +#include + +#define VMA_STATIC_VULKAN_FUNCTIONS 1 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0 +#include + +#include "debug.h" +#include "constants.h" +#include "glsl.h" + +#define HAVE_EXTERNAL_MEMORY 1 + +typedef struct QueueFamilyIndices { + int queue_family; +} QueueFamilyIndices; + +typedef struct MemorySyncRequirement { + hwaddr addr, size; +} MemorySyncRequirement; + +typedef struct RenderPassState { + VkFormat color_format; + VkFormat zeta_format; +} RenderPassState; + +typedef struct RenderPass { + RenderPassState state; + VkRenderPass render_pass; +} RenderPass; + +typedef struct PipelineKey { + bool clear; + RenderPassState render_pass_state; + ShaderState shader_state; + uint32_t regs[10]; + VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; +} PipelineKey; + +typedef struct PipelineBinding { + LruNode node; + PipelineKey key; + VkPipelineLayout layout; + VkPipeline pipeline; + VkRenderPass render_pass; + unsigned int draw_time; +} PipelineBinding; + +enum Buffer { + BUFFER_STAGING_DST, + BUFFER_STAGING_SRC, + BUFFER_COMPUTE_DST, + BUFFER_COMPUTE_SRC, + BUFFER_INDEX, + BUFFER_INDEX_STAGING, + BUFFER_VERTEX_RAM, + BUFFER_VERTEX_INLINE, + BUFFER_VERTEX_INLINE_STAGING, + BUFFER_UNIFORM, + BUFFER_UNIFORM_STAGING, + BUFFER_COUNT +}; + +typedef struct StorageBuffer { + VkBuffer buffer; + VkBufferUsageFlags usage; + VmaAllocationCreateInfo alloc_info; + VmaAllocation allocation; + VkMemoryPropertyFlags properties; + size_t buffer_offset; + size_t buffer_size; + uint8_t *mapped; +} StorageBuffer; + +typedef struct SurfaceBinding { + QTAILQ_ENTRY(SurfaceBinding) entry; + MemAccessCallback *access_cb; + + hwaddr vram_addr; + + SurfaceShape shape; + uintptr_t dma_addr; + uintptr_t dma_len; + bool color; + bool swizzle; + + unsigned int width; + unsigned int height; + unsigned int pitch; + size_t size; + + bool cleared; + int frame_time; + int draw_time; + bool draw_dirty; + bool download_pending; + bool upload_pending; + + BasicSurfaceFormatInfo fmt; + SurfaceFormatInfo host_fmt; + + VkImage image; + VkImageView image_view; + VmaAllocation allocation; + + // Used for scaling + VkImage image_scratch; + VkImageLayout image_scratch_current_layout; + VmaAllocation allocation_scratch; + + bool initialized; +} SurfaceBinding; + +typedef struct ShaderModuleInfo { + char *glsl; + GByteArray *spirv; + VkShaderModule module; + SpvReflectShaderModule reflect_module; + SpvReflectDescriptorSet **descriptor_sets; + ShaderUniformLayout uniforms; + ShaderUniformLayout push_constants; +} ShaderModuleInfo; + +typedef struct ShaderBinding { + LruNode node; + ShaderState state; + ShaderModuleInfo *geometry; + ShaderModuleInfo *vertex; + ShaderModuleInfo *fragment; + + int psh_constant_loc[9][2]; + int alpha_ref_loc; + + int bump_mat_loc[NV2A_MAX_TEXTURES]; + int bump_scale_loc[NV2A_MAX_TEXTURES]; + int bump_offset_loc[NV2A_MAX_TEXTURES]; + int tex_scale_loc[NV2A_MAX_TEXTURES]; + + int surface_size_loc; + int clip_range_loc; + + int vsh_constant_loc; + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + + int inv_viewport_loc; + int ltctxa_loc; + int ltctxb_loc; + int ltc1_loc; + + int fog_color_loc; + int fog_param_loc; + int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; + int light_infinite_direction_loc[NV2A_MAX_LIGHTS]; + int light_local_position_loc[NV2A_MAX_LIGHTS]; + int light_local_attenuation_loc[NV2A_MAX_LIGHTS]; + + int clip_region_loc; + + int material_alpha_loc; +} ShaderBinding; + +typedef struct TextureKey { + TextureShape state; + hwaddr texture_vram_offset; + hwaddr texture_length; + hwaddr palette_vram_offset; + hwaddr palette_length; + float scale; +} TextureKey; + +typedef struct TextureBinding { + LruNode node; + TextureKey key; + VkImage image; + VkImageLayout current_layout; + VkImageView image_view; + VmaAllocation allocation; + VkSampler sampler; + bool possibly_dirty; + uint64_t hash; + unsigned int draw_time; + uint32_t submit_time; +} TextureBinding; + +typedef struct QueryReport { + QSIMPLEQ_ENTRY(QueryReport) entry; + bool clear; + uint32_t parameter; + unsigned int query_count; +} QueryReport; + +typedef struct PGRAPHVkDisplayState { + ShaderModuleInfo *display_frag; + + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_set; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + VkRenderPass render_pass; + VkFramebuffer framebuffer; + + VkImage image; + VkImageView image_view; + VkDeviceMemory memory; + VkSampler sampler; + + int width, height; + int draw_time; + + // OpenGL Interop +#ifdef WIN32 + HANDLE handle; +#else + int fd; +#endif + GLuint gl_memory_obj; + GLuint gl_texture_id; +} PGRAPHVkDisplayState; + +typedef struct PGRAPHVkComputeState { + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_sets[1]; + VkPipelineLayout pipeline_layout; + VkPipeline pipeline_pack_d24s8; + VkPipeline pipeline_unpack_d24s8; + VkPipeline pipeline_pack_f32s8; + VkPipeline pipeline_unpack_f32s8; +} PGRAPHVkComputeState; + +typedef struct PGRAPHVkState { + void *window; + VkInstance instance; + + bool debug_utils_extension_enabled; + bool custom_border_color_extension_enabled; + bool provoking_vertex_extension_enabled; + bool memory_budget_extension_enabled; + + VkPhysicalDevice physical_device; + VkPhysicalDeviceProperties device_props; + VkDevice device; + VmaAllocator allocator; + uint32_t allocator_last_submit_index; + + VkQueue queue; + VkCommandPool command_pool; + VkCommandBuffer command_buffers[2]; + + VkCommandBuffer command_buffer; + VkSemaphore command_buffer_semaphore; + VkFence command_buffer_fence; + unsigned int command_buffer_start_time; + bool in_command_buffer; + uint32_t submit_count; + + VkCommandBuffer aux_command_buffer; + bool in_aux_command_buffer; + + VkFramebuffer framebuffers[50]; + int framebuffer_index; + bool framebuffer_dirty; + + VkRenderPass render_pass; + RenderPass *render_passes; + int render_passes_index; + int render_passes_capacity; + bool in_render_pass; + bool in_draw; + + Lru pipeline_cache; + VkPipelineCache vk_pipeline_cache; + PipelineBinding *pipeline_cache_entries; + PipelineBinding *pipeline_binding; + bool pipeline_binding_changed; + + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_sets[1024]; + int descriptor_set_index; + + StorageBuffer storage_buffers[BUFFER_COUNT]; + + MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t num_vertex_ram_buffer_syncs; + unsigned long *uploaded_bitmap; + size_t bitmap_size; + + VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES]; + int num_active_vertex_attribute_descriptions; + + VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + int num_active_vertex_binding_descriptions; + hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; + + QTAILQ_HEAD(, SurfaceBinding) surfaces; + QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces; + SurfaceBinding *color_binding, *zeta_binding; + bool downloads_pending; + QemuEvent downloads_complete; + bool download_dirty_surfaces_pending; + QemuEvent dirty_surfaces_download_complete; // common + + Lru texture_cache; + TextureBinding *texture_cache_entries; + TextureBinding *texture_bindings[NV2A_MAX_TEXTURES]; + TextureBinding dummy_texture; + bool texture_bindings_changed; + + Lru shader_cache; + ShaderBinding *shader_cache_entries; + ShaderBinding *shader_binding; + ShaderModuleInfo *quad_vert_module, *solid_frag_module; + bool shader_bindings_changed; + + // FIXME: Merge these into a structure + uint64_t uniform_buffer_hashes[2]; + size_t uniform_buffer_offsets[2]; + bool uniforms_changed; + + VkQueryPool query_pool; + int max_queries_in_flight; // FIXME: Move out to constant + int num_queries_in_flight; + bool new_query_needed; + bool query_in_flight; + uint32_t zpass_pixel_count_result; + QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate + + SurfaceFormatInfo kelvin_surface_zeta_vk_map[3]; + + uint32_t clear_parameter; + + PGRAPHVkDisplayState display; + PGRAPHVkComputeState compute; +} PGRAPHVkState; + +// renderer.c +void pgraph_vk_check_memory_budget(PGRAPHState *pg); + +// debug.c +void pgraph_vk_debug_init(void); + +// instance.c +void pgraph_vk_init_instance(PGRAPHState *pg); +void pgraph_vk_finalize_instance(PGRAPHState *pg); +QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device); +uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, + VkMemoryPropertyFlags properties); + +// glsl.c +void pgraph_vk_init_glsl_compiler(void); +void pgraph_vk_finalize_glsl_compiler(void); +GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, + const char *glsl_source); +VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, + GByteArray *spv); +ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl( + PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl); +void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info); + +// buffer.c +void pgraph_vk_init_buffers(NV2AState *d); +void pgraph_vk_finalize_buffers(NV2AState *d); +bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, + VkDeviceSize size, + VkDeviceAddress alignment); +VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, + VkDeviceSize *sizes, size_t count, + VkDeviceAddress alignment); + +// command.c +void pgraph_vk_init_command_buffers(PGRAPHState *pg); +void pgraph_vk_finalize_command_buffers(PGRAPHState *pg); +VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg); +void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd); + +// image.c +void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, + VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout); + +// vertex.c +void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, + bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element); +void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d); +void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data, + VkDeviceSize size); +VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data, + VkDeviceSize size); +VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data, + VkDeviceSize *sizes, + size_t count); + +// surface.c +void pgraph_vk_init_surfaces(PGRAPHState *pg); +void pgraph_vk_finalize_surfaces(PGRAPHState *pg); +void pgraph_vk_surface_flush(NV2AState *d); +void pgraph_vk_process_pending_downloads(NV2AState *d); +void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); +SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr); +void pgraph_vk_wait_for_surface_download(SurfaceBinding *e); +void pgraph_vk_download_dirty_surfaces(NV2AState *d); +void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force); +void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write); +SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr); +void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); +void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale); +unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d); +void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg); + +// surface-compute.c +void pgraph_vk_init_compute(PGRAPHState *pg); +void pgraph_vk_finalize_compute(PGRAPHState *pg); +void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst, bool downscale); +void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst); + +// display.c +void pgraph_vk_init_display(PGRAPHState *pg); +void pgraph_vk_finalize_display(PGRAPHState *pg); +void pgraph_vk_render_display(PGRAPHState *pg); + +// texture.c +void pgraph_vk_init_textures(PGRAPHState *pg); +void pgraph_vk_finalize_textures(PGRAPHState *pg); +void pgraph_vk_bind_textures(NV2AState *d); +void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, + hwaddr size); +void pgraph_vk_trim_texture_cache(PGRAPHState *pg); + +// shaders.c +void pgraph_vk_init_shaders(PGRAPHState *pg); +void pgraph_vk_finalize_shaders(PGRAPHState *pg); +void pgraph_vk_update_descriptor_sets(PGRAPHState *pg); +void pgraph_vk_bind_shaders(PGRAPHState *pg); +void pgraph_vk_update_shader_uniforms(PGRAPHState *pg); + +// reports.c +void pgraph_vk_init_reports(PGRAPHState *pg); +void pgraph_vk_finalize_reports(PGRAPHState *pg); +void pgraph_vk_clear_report_value(NV2AState *d); +void pgraph_vk_get_report(NV2AState *d, uint32_t parameter); +void pgraph_vk_process_pending_reports(NV2AState *d); +void pgraph_vk_process_pending_reports_internal(NV2AState *d); + +typedef enum FinishReason { + VK_FINISH_REASON_VERTEX_BUFFER_DIRTY, + VK_FINISH_REASON_SURFACE_CREATE, + VK_FINISH_REASON_SURFACE_DOWN, + VK_FINISH_REASON_NEED_BUFFER_SPACE, + VK_FINISH_REASON_FRAMEBUFFER_DIRTY, + VK_FINISH_REASON_PRESENTING, + VK_FINISH_REASON_FLIP_STALL, + VK_FINISH_REASON_FLUSH, +} FinishReason; + +// draw.c +void pgraph_vk_init_pipelines(PGRAPHState *pg); +void pgraph_vk_finalize_pipelines(PGRAPHState *pg); +void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter); +void pgraph_vk_draw_begin(NV2AState *d); +void pgraph_vk_draw_end(NV2AState *d); +void pgraph_vk_finish(PGRAPHState *pg, FinishReason why); +void pgraph_vk_flush_draw(NV2AState *d); +void pgraph_vk_begin_command_buffer(PGRAPHState *pg); +void pgraph_vk_ensure_command_buffer(PGRAPHState *pg); +void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg); + +VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg); +void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); + +// blit.c +void pgraph_vk_image_blit(NV2AState *d); + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c new file mode 100644 index 0000000000..2e6bdf96f3 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/reports.c @@ -0,0 +1,134 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +void pgraph_vk_init_reports(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QSIMPLEQ_INIT(&r->report_queue); + r->num_queries_in_flight = 0; + r->max_queries_in_flight = 1024; + r->new_query_needed = true; + r->query_in_flight = false; + r->zpass_pixel_count_result = 0; + + VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_OCCLUSION, + .queryCount = r->max_queries_in_flight, + }; + VK_CHECK( + vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool)); +} + +void pgraph_vk_finalize_reports(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyQueryPool(r->device, r->query_pool, NULL); +} + +void pgraph_vk_clear_report_value(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + q->clear = true; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); +} + +void pgraph_vk_get_report(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + q->clear = false; + q->parameter = parameter; + q->query_count = r->num_queries_in_flight; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); + + r->new_query_needed = true; +} + +void pgraph_vk_process_pending_reports_internal(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DGROUP_BEGIN("Processing queries"); + + assert(!r->in_command_buffer); + + // Fetch all query results + g_autofree uint64_t *query_results = NULL; + + if (r->num_queries_in_flight > 0) { + size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t); + query_results = g_malloc_n(r->num_queries_in_flight, + sizeof(uint64_t)); // FIXME: Pre-allocate + VkResult result; + do { + result = vkGetQueryPoolResults( + r->device, r->query_pool, 0, r->num_queries_in_flight, + size_of_results, query_results, sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + } while (result == VK_NOT_READY); + } + + // Write out queries + QueryReport *q, *next; + int num_results_counted = 0; + + int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor; + + QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) { + if (q->clear) { + NV2A_VK_DPRINTF("Cleared"); + r->zpass_pixel_count_result = 0; + } else { + assert(q->query_count >= num_results_counted); + assert(q->query_count <= r->num_queries_in_flight); + + while (num_results_counted < q->query_count) { + r->zpass_pixel_count_result += + query_results[num_results_counted++]; + } + + pgraph_write_zpass_pixel_cnt_report( + d, q->parameter, + r->zpass_pixel_count_result / result_divisor); + } + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(q); + } + + r->num_queries_in_flight = 0; + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_process_pending_reports(NV2AState *d) +{ +} diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c new file mode 100644 index 0000000000..7d5000d751 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -0,0 +1,797 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "hw/xbox/nv2a/pgraph/glsl/geom.h" +#include "hw/xbox/nv2a/pgraph/glsl/vsh.h" +#include "hw/xbox/nv2a/pgraph/glsl/psh.h" +#include "qemu/fast-hash.h" +#include "qemu/mstring.h" +#include "renderer.h" +#include + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + size_t num_sets = ARRAY_SIZE(r->descriptor_sets); + + VkDescriptorPoolSize pool_sizes[] = { + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 2 * num_sets, + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = NV2A_MAX_TEXTURES * num_sets, + } + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = ARRAY_SIZE(pool_sizes), + .pPoolSizes = pool_sizes, + .maxSets = ARRAY_SIZE(r->descriptor_sets), + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL); + r->descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES]; + + bindings[0] = (VkDescriptorSetLayoutBinding){ + .binding = VSH_UBO_BINDING, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + }; + bindings[1] = (VkDescriptorSetLayoutBinding){ + .binding = PSH_UBO_BINDING, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + bindings[2 + i] = (VkDescriptorSetLayoutBinding){ + .binding = PSH_TEX_BINDING + i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL); + r->descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + layouts[i] = r->descriptor_set_layout; + } + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->descriptor_pool, + .descriptorSetCount = ARRAY_SIZE(r->descriptor_sets), + .pSetLayouts = layouts, + }; + VK_CHECK( + vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets)); +} + +static void destroy_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeDescriptorSets(r->device, r->descriptor_pool, + ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets); + for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) { + r->descriptor_sets[i] = VK_NULL_HANDLE; + } +} + +void pgraph_vk_update_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + bool need_uniform_write = + r->uniforms_changed || + !r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset; + + if (!(r->shader_bindings_changed || r->texture_bindings_changed || + (r->descriptor_set_index == 0) || need_uniform_write)) { + return; // Nothing changed + } + + ShaderBinding *binding = r->shader_binding; + ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms, + &binding->fragment->uniforms }; + VkDeviceSize ubo_buffer_total_size = 0; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + ubo_buffer_total_size += layouts[i]->total_size; + } + bool need_ubo_staging_buffer_reset = + r->uniforms_changed && + !pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING, + ubo_buffer_total_size, + r->device_props.limits.minUniformBufferOffsetAlignment); + + bool need_descriptor_write_reset = + (r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets)); + + if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + need_uniform_write = true; + } + + VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES]; + + assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets)); + + if (need_uniform_write) { + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + void *data = layouts[i]->allocation; + VkDeviceSize size = layouts[i]->total_size; + r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer( + pg, BUFFER_UNIFORM_STAGING, &data, &size, 1, + r->device_props.limits.minUniformBufferOffsetAlignment); + } + + r->uniforms_changed = false; + } + + VkDescriptorBufferInfo ubo_buffer_infos[2]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + ubo_buffer_infos[i] = (VkDescriptorBufferInfo){ + .buffer = r->storage_buffers[BUFFER_UNIFORM].buffer, + .offset = r->uniform_buffer_offsets[i], + .range = layouts[i]->total_size, + }; + descriptor_writes[i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->descriptor_sets[r->descriptor_set_index], + .dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &ubo_buffer_infos[i], + }; + } + + VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES]; + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + image_infos[i] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->texture_bindings[i]->image_view, + .sampler = r->texture_bindings[i]->sampler, + }; + descriptor_writes[2 + i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->descriptor_sets[r->descriptor_set_index], + .dstBinding = PSH_TEX_BINDING + i, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[i], + }; + } + + vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL); + + r->descriptor_set_index++; +} + +static void update_shader_constant_locations(ShaderBinding *binding) +{ + int i, j; + char tmp[64]; + + /* lookup fragment shader uniforms */ + for (i = 0; i < 9; i++) { + for (j = 0; j < 2; j++) { + snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); + binding->psh_constant_loc[i][j] = + uniform_index(&binding->fragment->uniforms, tmp); + } + } + binding->alpha_ref_loc = + uniform_index(&binding->fragment->uniforms, "alphaRef"); + binding->fog_color_loc = + uniform_index(&binding->fragment->uniforms, "fogColor"); + for (i = 1; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "bumpMat%d", i); + binding->bump_mat_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "bumpScale%d", i); + binding->bump_scale_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); + binding->bump_offset_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "texScale%d", i); + binding->tex_scale_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + } + + /* lookup vertex shader uniforms */ + binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c"); + binding->surface_size_loc = + uniform_index(&binding->vertex->uniforms, "surfaceSize"); + binding->clip_range_loc = + uniform_index(&binding->vertex->uniforms, "clipRange"); + binding->fog_param_loc = + uniform_index(&binding->vertex->uniforms, "fogParam"); + + binding->inv_viewport_loc = + uniform_index(&binding->vertex->uniforms, "invViewport"); + binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa"); + binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb"); + binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1"); + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); + binding->light_infinite_half_vector_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); + binding->light_infinite_direction_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + + snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); + binding->light_local_position_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); + binding->light_local_attenuation_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + } + + binding->clip_region_loc = + uniform_index(&binding->fragment->uniforms, "clipRegion"); + + binding->material_alpha_loc = + uniform_index(&binding->vertex->uniforms, "material_alpha"); +} + +static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + ShaderBinding *snode = container_of(node, ShaderBinding, node); + memcpy(&snode->state, state, sizeof(ShaderState)); +} + +static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache); + ShaderBinding *snode = container_of(node, ShaderBinding, node); + + ShaderModuleInfo *modules[] = { + snode->geometry, + snode->vertex, + snode->fragment, + }; + for (int i = 0; i < ARRAY_SIZE(modules); i++) { + if (modules[i]) { + pgraph_vk_destroy_shader_module(r, modules[i]); + } + } + + memset(&snode->state, 0, sizeof(ShaderState)); +} + +static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ShaderBinding *snode = container_of(node, ShaderBinding, node); + return memcmp(&snode->state, key, sizeof(ShaderState)); +} + +static void shader_cache_init(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + const size_t shader_cache_size = 1024; + lru_init(&r->shader_cache); + r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding)); + assert(r->shader_cache_entries != NULL); + for (int i = 0; i < shader_cache_size; i++) { + lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node); + } + r->shader_cache.init_node = shader_cache_entry_init; + r->shader_cache.compare_nodes = shader_cache_entry_compare; + r->shader_cache.post_node_evict = shader_cache_entry_post_evict; +} + +static void shader_cache_finalize(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + lru_flush(&r->shader_cache); + g_free(r->shader_cache_entries); + r->shader_cache_entries = NULL; +} + +static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + uint64_t hash = fast_hash((void *)state, sizeof(*state)); + LruNode *node = lru_lookup(&r->shader_cache, hash, state); + ShaderBinding *snode = container_of(node, ShaderBinding, node); + + NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode); + + if (!snode->fragment) { + NV2A_VK_DPRINTF("cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); + + char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); + if (previous_numeric_locale) { + previous_numeric_locale = g_strdup(previous_numeric_locale); + } + + /* Ensure numeric values are printed with '.' radix, no grouping */ + setlocale(LC_NUMERIC, "C"); + + MString *geometry_shader_code = pgraph_gen_geom_glsl( + state->polygon_front_mode, state->polygon_back_mode, + state->primitive_mode, state->smooth_shading, true); + if (geometry_shader_code) { + NV2A_VK_DPRINTF("geometry shader: \n%s", + mstring_get_str(geometry_shader_code)); + snode->geometry = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_GEOMETRY_BIT, + mstring_get_str(geometry_shader_code)); + mstring_unref(geometry_shader_code); + } else { + memset(&snode->geometry, 0, sizeof(snode->geometry)); + } + + MString *vertex_shader_code = + pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL); + NV2A_VK_DPRINTF("vertex shader: \n%s", + mstring_get_str(vertex_shader_code)); + snode->vertex = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, + mstring_get_str(vertex_shader_code)); + mstring_unref(vertex_shader_code); + + MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh); + NV2A_VK_DPRINTF("fragment shader: \n%s", + mstring_get_str(fragment_shader_code)); + snode->fragment = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, + mstring_get_str(fragment_shader_code)); + mstring_unref(fragment_shader_code); + + if (previous_numeric_locale) { + setlocale(LC_NUMERIC, previous_numeric_locale); + g_free(previous_numeric_locale); + } + + update_shader_constant_locations(snode); + } + + return snode; +} + +// FIXME: Move to common +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, + bool binding_changed, bool vertex_program, + bool fixed_function) +{ + int i, j; + + /* update combiner constants */ + for (i = 0; i < 9; i++) { + uint32_t constant[2]; + if (i == 8) { + /* final combiner */ + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1); + } else { + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (j = 0; j < 2; j++) { + GLint loc = binding->psh_constant_loc[i][j]; + if (loc != -1) { + float value[4]; + pgraph_argb_pack32_to_rgba_float(constant[j], value); + uniform1fv(&binding->fragment->uniforms, loc, 4, value); + } + } + } + if (binding->alpha_ref_loc != -1) { + float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ALPHAREF) / + 255.0; + uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc, + alpha_ref); + } + + + /* For each texture stage */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + int loc; + + /* Bump luminance only during stages 1 - 3 */ + if (i > 0) { + loc = binding->bump_mat_loc[i]; + if (loc != -1) { + uint32_t m_u32[4]; + m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)); + m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)); + m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)); + m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)); + float m[4]; + m[0] = *(float*)&m_u32[0]; + m[1] = *(float*)&m_u32[1]; + m[2] = *(float*)&m_u32[2]; + m[3] = *(float*)&m_u32[3]; + uniformMatrix2fv(&binding->fragment->uniforms, loc, m); + } + loc = binding->bump_scale_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4); + uniform1f(&binding->fragment->uniforms, loc, + *(float *)&v); + } + loc = binding->bump_offset_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4); + uniform1f(&binding->fragment->uniforms, loc, + *(float *)&v); + } + } + + loc = binding->tex_scale_loc[i]; + if (loc != -1) { + assert(pg->vk_renderer_state->texture_bindings[i] != NULL); + float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale; + BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format]; + if (!f_basic.linear) { + scale = 1.0; + } + uniform1f(&binding->fragment->uniforms, loc, scale); + } + } + + if (binding->fog_color_loc != -1) { + uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR); + uniform4f(&binding->fragment->uniforms, binding->fog_color_loc, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); + } + if (binding->fog_param_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1); + uniform2f(&binding->vertex->uniforms, + binding->fog_param_loc, *(float *)&v[0], + *(float *)&v[1]); + } + + float zmax; + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; + break; + default: + assert(0); + } + + if (fixed_function) { + /* update lighting constants */ + struct { + uint32_t *v; + int locs; + size_t len; + } lighting_arrays[] = { + { &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT }, + { &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT }, + { &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT }, + }; + + for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) { + uniform1iv( + &binding->vertex->uniforms, lighting_arrays[i].locs, + lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v); + } + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + int loc = binding->light_infinite_half_vector_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_infinite_half_vector[i]); + } + loc = binding->light_infinite_direction_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_infinite_direction[i]); + } + + loc = binding->light_local_position_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_local_position[i]); + } + loc = binding->light_local_attenuation_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_local_attenuation[i]); + } + } + + /* estimate the viewport by assuming it matches the surface ... */ + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + + float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width); + float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height); + float m33 = zmax; + float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; + float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; + + float invViewport[16] = { + 1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0, + 0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22, + 0, 1.0 + }; + + if (binding->inv_viewport_loc != -1) { + uniformMatrix4fv(&binding->vertex->uniforms, + binding->inv_viewport_loc, &invViewport[0]); + } + } + + /* update vertex program constants */ + uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc, + NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants); + + if (binding->surface_size_loc != -1) { + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + uniform2f(&binding->vertex->uniforms, binding->surface_size_loc, + pg->surface_binding_dim.width / aa_width, + pg->surface_binding_dim.height / aa_height); + } + + if (binding->clip_range_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); + float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0; + float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0; + uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, + zmax, zclip_min, zclip_max); + } + + /* Clipping regions */ + unsigned int max_gl_width = pg->surface_binding_dim.width; + unsigned int max_gl_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); + + uint32_t clip_regions[8][4]; + + for (i = 0; i < 8; i++) { + uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4); + unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; + uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4); + unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; + pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); + pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); + + pgraph_apply_scaling_factor(pg, &x_min, &y_min); + pgraph_apply_scaling_factor(pg, &x_max, &y_max); + + clip_regions[i][0] = x_min; + clip_regions[i][1] = y_min; + clip_regions[i][2] = x_max; + clip_regions[i][3] = y_max; + } + uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc, + 8 * 4, (void *)clip_regions); + + if (binding->material_alpha_loc != -1) { + uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc, + pg->material_alpha); + } +} + +// Quickly check PGRAPH state to see if any registers have changed that +// necessitate a full shader state inspection. +static bool check_shaders_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!r->shader_binding) { + return true; + } + if (pg->program_data_dirty) { + return true; + } + + int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF; + for (int i = 0; i < num_stages; i++) { + if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) { + return true; + } + } + unsigned int regs[] = { + NV_PGRAPH_COMBINECTL, + NV_PGRAPH_COMBINESPECFOG0, + NV_PGRAPH_COMBINESPECFOG1, + NV_PGRAPH_CSV0_C, + NV_PGRAPH_CSV0_D, + NV_PGRAPH_CSV1_A, + NV_PGRAPH_CSV1_B, + NV_PGRAPH_POINTSIZE, + NV_PGRAPH_SHADERCLIPMODE, + NV_PGRAPH_SHADERCTL, + NV_PGRAPH_SHADERPROG, + NV_PGRAPH_SHADOWCTL, + }; + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + if (pgraph_is_reg_dirty(pg, regs[i])) { + return true; + } + } + + ShaderState *state = &r->shader_binding->state; + if (pg->uniform_attrs != state->uniform_attrs || + pg->swizzle_attrs != state->swizzle_attrs || + pg->compressed_attrs != state->compressed_attrs || + pg->primitive_mode != state->primitive_mode || + pg->surface_scale_factor != state->surface_scale_factor) { + return true; + } + + // Textures + for (int i = 0; i < 4; i++) { + if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) { + return true; + } + } + + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); + + return false; +} + +void pgraph_vk_bind_shaders(PGRAPHState *pg) +{ + NV2A_VK_DGROUP_BEGIN("%s", __func__); + + PGRAPHVkState *r = pg->vk_renderer_state; + + r->shader_bindings_changed = false; + + if (check_shaders_dirty(pg)) { + ShaderState new_state; + memset(&new_state, 0, sizeof(ShaderState)); + new_state = pgraph_get_shader_state(pg); + if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) { + r->shader_binding = gen_shaders(pg, &new_state); + r->shader_bindings_changed = true; + } + } + + // FIXME: Use dirty bits + pgraph_vk_update_shader_uniforms(pg); + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_update_shader_uniforms(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + NV2A_VK_DGROUP_BEGIN("%s", __func__); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); + + assert(r->shader_binding); + ShaderBinding *binding = r->shader_binding; + ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms, + &binding->fragment->uniforms }; + shader_update_constants(pg, r->shader_binding, true, + r->shader_binding->state.vertex_program, + r->shader_binding->state.fixed_function); + + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size); + r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]); + r->uniform_buffer_hashes[i] = hash; + } + + nv2a_profile_inc_counter(r->uniforms_changed ? + NV2A_PROF_SHADER_UBO_DIRTY : + NV2A_PROF_SHADER_UBO_NOTDIRTY); + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_init_shaders(PGRAPHState *pg) +{ + pgraph_vk_init_glsl_compiler(); + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + shader_cache_init(pg); +} + +void pgraph_vk_finalize_shaders(PGRAPHState *pg) +{ + shader_cache_finalize(pg); + destroy_descriptor_sets(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); + pgraph_vk_finalize_glsl_compiler(); +} diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c new file mode 100644 index 0000000000..045f8231b8 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -0,0 +1,473 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "renderer.h" +#include + +// TODO: Swizzle/Unswizzle +// TODO: Float depth format (low priority, but would be better for accuracy) + +// FIXME: Below pipeline creation assumes identical 3 buffer setup. For +// swizzle shader we will need more flexibility. + +const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n" + "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_in / width_out;" + " uint y = (idx_out / width_out) * scale;\n" + " uint x = (idx_out % width_out) * scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " uint depth_value = depth_in[idx_in];\n" + " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n" + " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n" + "}\n"; + +const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n" + "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_out / width_in;" + " uint y = (idx_out / width_out) / scale;\n" + " uint x = (idx_out % width_out) / scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n" + " if (idx_out % 4 == 0) {\n" + " uint stencil_value = 0;\n" + " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels + " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n" + " stencil_value |= v << (i * 8);\n" + " }\n" + " stencil_out[idx_out / 4] = stencil_value;\n" + " }\n" + "}\n"; + +const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthIn { float depth_in[]; };\n" + "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint y = idx_out / width_out;\n" + " uint x = idx_out % width_out;\n" + " return (y * width_in + x) * (width_in / width_out);\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n" + " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n" + " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n" + "}\n"; + +const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthOut { float depth_out[]; };\n" + "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_out / width_in;" + " uint y = (idx_out / width_out) / scale;\n" + " uint x = (idx_out % width_out) / scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n" + " if (idx_out % 4 == 0) {\n" + " uint stencil_value = 0;\n" + " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels + " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n" + " stencil_value |= v << (i * 8);\n" + " }\n" + " stencil_out[idx_out / 4] = stencil_value;\n" + " }\n" + "}\n"; + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorPoolSize pool_sizes[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 3, + }, + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = ARRAY_SIZE(pool_sizes), + .pPoolSizes = pool_sizes, + .maxSets = ARRAY_SIZE(r->compute.descriptor_sets), + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->compute.descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL); + r->compute.descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + const int num_buffers = 3; + + VkDescriptorSetLayoutBinding bindings[num_buffers]; + for (int i = 0; i < num_buffers; i++) { + bindings[i] = (VkDescriptorSetLayoutBinding){ + .binding = i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->compute.descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout, + NULL); + r->compute.descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + layouts[i] = r->compute.descriptor_set_layout; + } + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->compute.descriptor_pool, + .descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets), + .pSetLayouts = layouts, + }; + VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info, + r->compute.descriptor_sets)); +} + +static void destroy_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeDescriptorSets(r->device, r->compute.descriptor_pool, + ARRAY_SIZE(r->compute.descriptor_sets), + r->compute.descriptor_sets); + for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) { + r->compute.descriptor_sets[i] = VK_NULL_HANDLE; + } +} + +static void create_compute_pipeline_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .size = 2 * sizeof(uint32_t), + }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->compute.descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &r->compute.pipeline_layout)); +} + +static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_COMPUTE_BIT, glsl); + + VkComputePipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .layout = r->compute.pipeline_layout, + .stage = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .pName = "main", + .module = module->module, + }, + }; + VkPipeline pipeline; + VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, + &pipeline)); + + pgraph_vk_destroy_shader_module(r, module); + + return pipeline; +} + +static void update_descriptor_sets(PGRAPHState *pg, + VkDescriptorBufferInfo *buffers, int count) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(count == 3); + VkWriteDescriptorSet descriptor_writes[3]; + const int descriptor_set_index = 0; + + for (int i = 0; i < count; i++) { + descriptor_writes[i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->compute.descriptor_sets[descriptor_set_index], + .dstBinding = i, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &buffers[i], + }; + } + vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL); +} + +// +// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8 +// formatted buffer with depth in bits 31-8 and stencil in bits 7-0. +// +void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst, bool downscale) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int input_width = surface->width, input_height = surface->height; + pgraph_apply_scaling_factor(pg, &input_width, &input_height); + + unsigned int output_width = surface->width, output_height = surface->height; + if (!downscale) { + pgraph_apply_scaling_factor(pg, &output_width, &output_height); + } + + size_t depth_bytes_per_pixel = 4; + size_t depth_size = input_width * input_height * depth_bytes_per_pixel; + + size_t stencil_bytes_per_pixel = 1; + size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel; + + size_t output_bytes_per_pixel = 4; + size_t output_size = output_width * output_height * output_bytes_per_pixel; + + VkDescriptorBufferInfo buffers[] = { + { + .buffer = src, + .offset = 0, + .range = depth_size, + }, + { + .buffer = src, + .offset = depth_size, + .range = stencil_size, + }, + { + .buffer = dst, + .offset = 0, + .range = output_size, + }, + }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); + + if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_pack_d24s8); + } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_pack_f32s8); + } else { + assert(!"Unsupported pack format"); + } + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[0], 0, NULL); + + uint32_t push_constants[2] = { input_width, output_width }; + assert(sizeof(push_constants) == 8); + vkCmdPushConstants(cmd, r->compute.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + push_constants); + + size_t workgroup_size_in_units = 256; + size_t output_size_in_units = output_width * output_height; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + // FIXME: Check max group count + + vkCmdDispatch(cmd, group_count, 1, 1); +} + +void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int input_width = surface->width, input_height = surface->height; + + unsigned int output_width = surface->width, output_height = surface->height; + pgraph_apply_scaling_factor(pg, &output_width, &output_height); + + size_t depth_bytes_per_pixel = 4; + size_t depth_size = output_width * output_height * depth_bytes_per_pixel; + + size_t stencil_bytes_per_pixel = 1; + size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel; + + size_t input_bytes_per_pixel = 4; + size_t input_size = input_width * input_height * input_bytes_per_pixel; + + VkDescriptorBufferInfo buffers[] = { + { + .buffer = dst, + .offset = 0, + .range = depth_size, + }, + { + .buffer = dst, + .offset = depth_size, + .range = stencil_size, + }, + { + .buffer = src, + .offset = 0, + .range = input_size, + }, + }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); + + if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_unpack_d24s8); + } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_unpack_f32s8); + } else { + assert(!"Unsupported pack format"); + } + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[0], 0, NULL); + + assert(output_width >= input_width); + uint32_t push_constants[2] = { input_width, output_width }; + assert(sizeof(push_constants) == 8); + vkCmdPushConstants(cmd, r->compute.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + push_constants); + + size_t workgroup_size_in_units = 256; + size_t output_size_in_units = output_width * output_height; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + // FIXME: Check max group count + + vkCmdDispatch(cmd, group_count, 1, 1); +} + +void pgraph_vk_init_compute(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + create_compute_pipeline_layout(pg); + + r->compute.pipeline_pack_d24s8 = + create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl); + r->compute.pipeline_unpack_d24s8 = + create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl); + r->compute.pipeline_pack_f32s8 = + create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl); + r->compute.pipeline_unpack_f32s8 = + create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl); +} + +void pgraph_vk_finalize_compute(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipeline *pipelines[] = { + &r->compute.pipeline_pack_d24s8, + &r->compute.pipeline_unpack_d24s8, + &r->compute.pipeline_pack_f32s8, + &r->compute.pipeline_unpack_f32s8, + }; + + for (int i = 0; i < ARRAY_SIZE(pipelines); i++) { + vkDestroyPipeline(r->device, *pipelines[i], NULL); + pipelines[i] = VK_NULL_HANDLE; + } + + vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL); + r->compute.pipeline_layout = VK_NULL_HANDLE; + + destroy_descriptor_sets(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); +} diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c new file mode 100644 index 0000000000..9df98666bf --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -0,0 +1,1485 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "qemu/compiler.h" +#include "ui/xemu-settings.h" +#include "renderer.h" + +const int num_invalid_surfaces_to_keep = 10; // FIXME: Make automatic +const int max_surface_frame_time_delta = 5; + +void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale) +{ + g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; + + qemu_mutex_unlock_iothread(); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, true); + qemu_mutex_unlock(&d->pfifo.lock); + + // FIXME: It's just flush + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); + qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.flush_complete); + qatomic_set(&d->pgraph.flush_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.flush_complete); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, false); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock_iothread(); +} + +unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d) +{ + return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer +} + +void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg) +{ + int factor = g_config.display.quality.surface_scale; + pg->surface_scale_factor = MAX(factor, 1); +} + +// FIXME: Move to common +static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width, + unsigned int *height) +{ + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + if (swizzle) { + *width = 1 << pg->surface_shape.log_width; + *height = 1 << pg->surface_shape.log_height; + } else { + *width = pg->surface_shape.clip_width; + *height = pg->surface_shape.clip_height; + } +} + +// FIXME: Move to common +static bool framebuffer_dirty(PGRAPHState const *pg) +{ + bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, + sizeof(SurfaceShape)) != 0; + if (!shape_changed || (!pg->surface_shape.color_format + && !pg->surface_shape.zeta_format)) { + return false; + } + return true; +} + +static void memcpy_image(void *dst, void const *src, int dst_stride, + int src_stride, int height) +{ + if (dst_stride == src_stride) { + memcpy(dst, src, dst_stride * height); + return; + } + + uint8_t *dst_ptr = (uint8_t *)dst; + uint8_t const *src_ptr = (uint8_t *)src; + + size_t copy_stride = MIN(src_stride, dst_stride); + + for (int i = 0; i < height; i++) { + memcpy(dst_ptr, src_ptr, copy_stride); + dst_ptr += dst_stride; + src_ptr += src_stride; + } +} + +static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, + uint8_t *pixels) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + + if (r->in_command_buffer && + surface->draw_time >= r->command_buffer_start_time) { + pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN); + } + + bool downscale = (pg->surface_scale_factor != 1); + + trace_nv2a_pgraph_surface_download( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + // Read surface into memory + uint8_t *gl_read_buf = pixels; + + uint8_t *swizzle_buf = pixels; + if (surface->swizzle) { + // FIXME: Swizzle in shader + assert(pg->surface_scale_factor == 1 || downscale); + swizzle_buf = (uint8_t *)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + } + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + int num_copy_regions = 1; + VkBufferImageCopy copy_regions[2]; + copy_regions[0] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = surface->color ? + VK_IMAGE_ASPECT_COLOR_BIT : + VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.layerCount = 1, + }; + + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + VkImage surface_image_loc; + if (downscale && !use_compute_to_convert_depth_stencil_format) { + copy_regions[0].imageExtent = + (VkExtent3D){ surface->width, surface->height, 1 }; + + if (surface->image_scratch_current_layout != + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + pgraph_vk_transition_image_layout( + pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + VkImageBlit blit_region = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.mipLevel = 0, + .srcSubresource.baseArrayLayer = 0, + .srcSubresource.layerCount = 1, + .srcOffsets[0] = (VkOffset3D){0, 0, 0}, + .srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, + + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.mipLevel = 0, + .dstSubresource.baseArrayLayer = 0, + .dstSubresource.layerCount = 1, + .dstOffsets[0] = (VkOffset3D){0, 0, 0}, + .dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, + }; + + vkCmdBlitImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->image_scratch, + surface->image_scratch_current_layout, 1, &blit_region, + surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + surface_image_loc = surface->image_scratch; + } else { + copy_regions[0].imageExtent = + (VkExtent3D){ scaled_width, scaled_height, 1 }; + surface_image_loc = surface->image; + } + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + copy_regions[num_copy_regions++] = (VkBufferImageCopy){ + .bufferOffset = scaled_width * scaled_height * 4, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + } + + int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ? + BUFFER_COMPUTE_DST : + BUFFER_STAGING_DST; + VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer; + + vkCmdCopyImageToBuffer(cmd, surface_image_loc, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, + num_copy_regions, copy_regions); + + // FIXME: Verify output of depth stencil conversion + // FIXME: Track current layout and only transition when required + + if (use_compute_to_convert_depth_stencil_format) { + size_t bytes_per_pixel = 4; + size_t packed_size = + downscale ? (surface->width * surface->height * bytes_per_pixel) : + (scaled_width * scaled_height * bytes_per_pixel); + + VkBufferMemoryBarrier pre_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_barrier, 0, NULL); + + VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer; + pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer, + downscale); + + VkBufferMemoryBarrier post_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = pack_buffer, + .size = packed_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_barrier, 0, NULL); + + copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer; + VkBufferCopy buffer_copy_region = { + .size = packed_size, + }; + vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region); + } + + size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * + surface->width * surface->height; + assert((downloaded_image_size) <= + r->storage_buffers[BUFFER_STAGING_DST].buffer_size); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1); + pgraph_vk_end_single_time_commands(pg, cmd); + + void *mapped_memory_ptr; + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_DST].allocation, + &mapped_memory_ptr)); + + // FIXME: Swizzle in shader + // FIXME: Eliminate this extra copy if we need to swizzle + // FIXME: Use native buffer copy options for pitch adjust + + bool no_conversion_necessary = + surface->color || use_compute_to_convert_depth_stencil_format || + surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM; + + assert(no_conversion_necessary); + + memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch, + surface->width * surface->fmt.bytes_per_pixel, + surface->height); + + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_DST].allocation); + + if (surface->swizzle) { + swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, + surface->pitch, surface->fmt.bytes_per_pixel); + nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); + g_free(swizzle_buf); + } +} + +static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force) +{ + if (!(surface->download_pending || force)) { + return; + } + + // FIXME: Respect write enable at last TOU? + + download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr); + + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_NV2A_TEX); + + surface->download_pending = false; + surface->draw_dirty = false; +} + +void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface) +{ + NV2AState *d = g_nv2a; + + if (qatomic_read(&surface->draw_dirty)) { + qemu_mutex_lock(&d->pfifo.lock); + qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete); + qatomic_set(&surface->download_pending, true); + qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete); + } +} + +void pgraph_vk_process_pending_downloads(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + SurfaceBinding *surface; + + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + download_surface(d, surface, false); + } + + qatomic_set(&r->downloads_pending, false); + qemu_event_set(&r->downloads_complete); +} + +void pgraph_vk_download_dirty_surfaces(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + pgraph_vk_surface_download_if_dirty(d, surface); + } + + qatomic_set(&r->download_dirty_surfaces_pending, false); + qemu_event_set(&r->dirty_surfaces_download_complete); +} + +static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, + hwaddr len, bool write) +{ + SurfaceBinding *e = opaque; + assert(addr >= e->vram_addr); + hwaddr offset = addr - e->vram_addr; + assert(offset < e->size); + + if (qatomic_read(&e->draw_dirty)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + pgraph_vk_wait_for_surface_download(e); + } + + if (write && !qatomic_read(&e->upload_pending)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + qatomic_set(&e->upload_pending, true); + } +} + +static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface) +{ + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_insert(qemu_get_cpu(0), + d->vram, surface->vram_addr, surface->size, + &surface->access_cb, &surface_access_callback, + surface); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } +} + +static void unregister_cpu_access_callback(NV2AState *d, + SurfaceBinding const *surface) +{ + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } +} + +static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface) +{ + if (surface->color) { + r->color_binding = surface; + } else { + r->zeta_binding = surface; + } + + r->framebuffer_dirty = true; +} + +static void unbind_surface(NV2AState *d, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (color) { + if (r->color_binding) { + r->color_binding = NULL; + r->framebuffer_dirty = true; + } + } else { + if (r->zeta_binding) { + r->zeta_binding = NULL; + r->framebuffer_dirty = true; + } + } +} + +static void invalidate_surface(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); + + // FIXME: We may be reading from the surface in the current command buffer! + // Add a detection to handle it. For now, finish to be safe. + pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN); + + assert((!r->in_command_buffer || + surface->draw_time < r->command_buffer_start_time) && + "Surface evicted while in use!"); + + if (surface == r->color_binding) { + assert(d->pgraph.surface_color.buffer_dirty); + unbind_surface(d, true); + } + if (surface == r->zeta_binding) { + assert(d->pgraph.surface_zeta.buffer_dirty); + unbind_surface(d, false); + } + + unregister_cpu_access_callback(d, surface); + + QTAILQ_REMOVE(&r->surfaces, surface, entry); + QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry); +} + +static void invalidate_overlapping_surfaces(NV2AState *d, + SurfaceBinding const *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + uintptr_t e_end = surface->vram_addr + surface->size - 1; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + uintptr_t s_end = s->vram_addr + s->size - 1; + bool overlapping = + !(s->vram_addr > e_end || surface->vram_addr > s_end); + if (overlapping) { + trace_nv2a_pgraph_surface_evict_overlapping( + s->vram_addr, s->width, s->height, + s->pitch); + pgraph_vk_surface_download_if_dirty(d, s); + invalidate_surface(d, s); + } + } +} + +static void surface_put(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL); + + invalidate_overlapping_surfaces(d, surface); + register_cpu_access_callback(d, surface); + + QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry); +} + +SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (surface->vram_addr == addr) { + return surface; + } + } + + return NULL; +} + +SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (addr >= surface->vram_addr && + addr < (surface->vram_addr + surface->size)) { + return surface; + } + } + + return NULL; +} + +static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + g_autofree gchar *label = g_strdup_printf( + "Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d", + surface->vram_addr, surface->color ? "Color" : "Zeta", + surface->color ? surface->shape.color_format : + surface->shape.zeta_format, + surface->width, surface->height, pg->surface_shape.anti_aliasing); + + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = (uint64_t)surface->image, + .pObjectName = label, + }; + + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, surface->allocation, label); + + if (surface->image_scratch) { + g_autofree gchar *label_scratch = + g_strdup_printf("%s (scratch)", label); + name_info.objectHandle = (uint64_t)surface->image_scratch; + name_info.pObjectName = label_scratch; + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, surface->allocation_scratch, + label_scratch); + } +} + +static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width = surface->width, height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + NV2A_VK_DPRINTF( + "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx, + width, height, surface->vram_addr); + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = surface->host_fmt.vk_format, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &surface->image, + &surface->allocation, NULL)); + + if (pg->surface_scale_factor > 1) { + VkImageCreateInfo scratch_image_create_info = image_create_info; + scratch_image_create_info.extent.width = surface->width; + scratch_image_create_info.extent.height = surface->height; + VK_CHECK( + vmaCreateImage(r->allocator, &scratch_image_create_info, + &alloc_create_info, &surface->image_scratch, + &surface->allocation_scratch, NULL)); + surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + } else { + surface->image_scratch = VK_NULL_HANDLE; + surface->allocation_scratch = VK_NULL_HANDLE; + } + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = surface->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = surface->host_fmt.vk_format, + .subresourceRange.aspectMask = surface->host_fmt.aspect, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &surface->image_view)); + + // FIXME: Go right into main command buffer + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_UNDEFINED, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3); + pgraph_vk_end_single_time_commands(pg, cmd); + nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE); +} + +static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src) +{ + dst->image = src->image; + dst->image_view = src->image_view; + dst->allocation = src->allocation; + dst->image_scratch = src->image_scratch; + dst->image_scratch_current_layout = src->image_scratch_current_layout; + dst->allocation_scratch = src->allocation_scratch; + + src->image = VK_NULL_HANDLE; + src->image_view = VK_NULL_HANDLE; + src->allocation = VK_NULL_HANDLE; + src->image_scratch = VK_NULL_HANDLE; + src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + src->allocation_scratch = VK_NULL_HANDLE; +} + +static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface) +{ + vkDestroyImageView(r->device, surface->image_view, NULL); + vmaDestroyImage(r->allocator, surface->image, surface->allocation); + if (surface->image_scratch) { + vmaDestroyImage(r->allocator, surface->image_scratch, + surface->allocation_scratch); + } +} + +static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface, + SurfaceBinding *target) +{ + return surface->host_fmt.vk_format == target->host_fmt.vk_format && + surface->width == target->width && + surface->height == target->height && + surface->pitch == target->pitch && + surface->host_fmt.usage == target->host_fmt.usage; +} + +static SurfaceBinding * +get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target) +{ + SurfaceBinding *surface, *next; + QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { + if (check_invalid_surface_is_compatibile(surface, target)) { + QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); + return surface; + } + } + + return NULL; +} + +static void prune_invalid_surfaces(PGRAPHVkState *r, int keep) +{ + int num_surfaces = 0; + + SurfaceBinding *surface, *next; + QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { + num_surfaces += 1; + if (num_surfaces > keep) { + QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); + destroy_surface_image(r, surface); + g_free(surface); + } + } +} + +static void expire_old_surfaces(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + int last_used = d->pgraph.frame_time - s->frame_time; + if (last_used >= max_surface_frame_time_delta) { + trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); + pgraph_vk_surface_download_if_dirty(d, s); + invalidate_surface(d, s); + } + } +} + +static bool check_surface_compatibility(SurfaceBinding const *s1, + SurfaceBinding const *s2, bool strict) +{ + bool format_compatible = + (s1->color == s2->color) && + (s1->host_fmt.vk_format == s2->host_fmt.vk_format) && + (s1->pitch == s2->pitch) && + (s1->shape.clip_x <= s2->shape.clip_x) && + (s1->shape.clip_y <= s2->shape.clip_y); + if (!format_compatible) { + return false; + } + + if (!strict) { + return (s1->width >= s2->width) && (s1->height >= s2->height); + } else { + return (s1->width == s2->width) && (s1->height == s2->height); + } +} + +void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface) +{ + if (surface->draw_dirty) { + download_surface(d, surface, true); + } +} + +void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!(surface->upload_pending || force)) { + return; + } + + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + + pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP + + trace_nv2a_pgraph_surface_upload( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + surface->upload_pending = false; + surface->draw_time = pg->draw_time; + + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + surface->vram_addr; + + g_autofree uint8_t *swizzle_buf = NULL; + uint8_t *gl_read_buf = NULL; + + if (surface->swizzle) { + swizzle_buf = (uint8_t*)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + unswizzle_rect(data + surface->vram_addr, + surface->width, surface->height, + swizzle_buf, + surface->pitch, + surface->fmt.bytes_per_pixel); + nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); + } else { + gl_read_buf = buf; + } + + // FIXME: Eliminate extra copies + + VkBufferImageCopy regions[2]; + int num_regions = 1; + regions[0] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = surface->color ? + VK_IMAGE_ASPECT_COLOR_BIT : + VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + regions[num_regions++] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + } + + size_t uploaded_image_size = surface->height * surface->width * + surface->fmt.bytes_per_pixel; + + StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC]; + assert(uploaded_image_size <= copy_buffer->buffer_size); + + void *mapped_memory_ptr; + VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation, + &mapped_memory_ptr)); + + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool no_conversion_necessary = + surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM || + use_compute_to_convert_depth_stencil_format; + assert(no_conversion_necessary); + + memcpy_image(mapped_memory_ptr, gl_read_buf, + surface->width * surface->fmt.bytes_per_pixel, surface->pitch, + surface->height); + + vmaUnmapMemory(r->allocator, copy_buffer->allocation); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + unsigned int scaled_width = surface->width, scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + if (use_compute_to_convert_depth_stencil_format) { + size_t packed_size = uploaded_image_size; + VkBufferCopy buffer_copy_region = { + .size = packed_size, + }; + vkCmdCopyBuffer(cmd, copy_buffer->buffer, + r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1, + &buffer_copy_region); + + size_t num_pixels = scaled_width * scaled_height; + size_t unpacked_depth_image_size = num_pixels * 4; + size_t unpacked_stencil_image_size = num_pixels; + size_t unpacked_size = + unpacked_depth_image_size + unpacked_stencil_image_size; + + VkBufferMemoryBarrier pre_unpack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = packed_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_unpack_barrier, 0, NULL); + + StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC]; + pgraph_vk_unpack_depth_stencil( + pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + unpack_buffer->buffer); + + VkBufferMemoryBarrier post_unpack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = unpack_buffer->buffer, + .size = unpacked_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_unpack_barrier, 0, NULL); + + // Already scaled during compute. Adjust copy regions. + regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; + regions[1].imageExtent = regions[0].imageExtent; + regions[1].bufferOffset = unpacked_depth_image_size; + + copy_buffer = unpack_buffer; + } + + bool upscale = !use_compute_to_convert_depth_stencil_format && + pg->surface_scale_factor > 1; + + if (upscale && surface->image_scratch_current_layout != + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, + upscale ? surface->image_scratch : surface->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions, + regions); + + if (upscale) { + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + VkImageBlit blitRegion = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.mipLevel = 0, + .srcSubresource.baseArrayLayer = 0, + .srcSubresource.layerCount = 1, + .srcOffsets[0] = (VkOffset3D){0, 0, 0}, + .srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, + + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.mipLevel = 0, + .dstSubresource.baseArrayLayer = 0, + .dstSubresource.layerCount = 1, + .dstOffsets[0] = (VkOffset3D){0, 0, 0}, + .dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, + }; + + vkCmdBlitImage(cmd, surface->image_scratch, + surface->image_scratch_current_layout, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion, + surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2); + pgraph_vk_end_single_time_commands(pg, cmd); + + surface->initialized = true; +} + +static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b) +{ + #define DO_CMP(fld) \ + if (a->fld != b->fld) \ + trace_nv2a_pgraph_surface_compare_mismatch( \ + #fld, (long int)a->fld, (long int)b->fld); + DO_CMP(shape.clip_x) + DO_CMP(shape.clip_width) + DO_CMP(shape.clip_y) + DO_CMP(shape.clip_height) + DO_CMP(fmt.bytes_per_pixel) + DO_CMP(host_fmt.vk_format) + DO_CMP(color) + DO_CMP(swizzle) + DO_CMP(vram_addr) + DO_CMP(width) + DO_CMP(height) + DO_CMP(pitch) + DO_CMP(size) + DO_CMP(dma_addr) + DO_CMP(dma_len) + DO_CMP(frame_time) + DO_CMP(draw_time) + #undef DO_CMP +} + +static void populate_surface_binding_target_sized(NV2AState *d, bool color, + unsigned int width, + unsigned int height, + SurfaceBinding *target) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + Surface *surface; + hwaddr dma_address; + BasicSurfaceFormatInfo fmt; + SurfaceFormatInfo host_fmt; + + if (color) { + surface = &pg->surface_color; + dma_address = pg->dma_color; + assert(pg->surface_shape.color_format != 0); + assert(pg->surface_shape.color_format < + ARRAY_SIZE(kelvin_surface_color_format_vk_map)); + fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format]; + host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format]; + if (host_fmt.host_bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", + pg->surface_shape.color_format); + abort(); + } + } else { + surface = &pg->surface_zeta; + dma_address = pg->dma_zeta; + assert(pg->surface_shape.zeta_format != 0); + assert(pg->surface_shape.zeta_format < + ARRAY_SIZE(r->kelvin_surface_zeta_vk_map)); + fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format]; + host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format]; + // FIXME: Support float 16,24b float format surface + } + + DMAObject dma = nv_dma_load(d, dma_address); + // There's a bunch of bugs that could cause us to hit this function + // at the wrong time and get a invalid dma object. + // Check that it's sane. + assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); + // assert(dma.address + surface->offset != 0); + assert(surface->offset <= dma.limit); + assert(surface->offset + surface->pitch * height <= dma.limit + 1); + assert(surface->pitch % fmt.bytes_per_pixel == 0); + assert((dma.address & ~0x07FFFFFF) == 0); + + target->shape = (color || !r->color_binding) ? pg->surface_shape : + r->color_binding->shape; + target->fmt = fmt; + target->host_fmt = host_fmt; + target->color = color; + target->swizzle = + (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + target->vram_addr = dma.address + surface->offset; + target->width = width; + target->height = height; + target->pitch = surface->pitch; + target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); + target->upload_pending = true; + target->download_pending = false; + target->draw_dirty = false; + target->dma_addr = dma.address; + target->dma_len = dma.limit; + target->frame_time = pg->frame_time; + target->draw_time = pg->draw_time; + target->cleared = false; + + target->initialized = false; +} + +static void populate_surface_binding_target(NV2AState *d, bool color, + SurfaceBinding *target) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width, height; + + if (color || !r->color_binding) { + get_surface_dimensions(pg, &width, &height); + pgraph_apply_anti_aliasing_factor(pg, &width, &height); + + // Since we determine surface dimensions based on the clipping + // rectangle, make sure to include the surface offset as well. + if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { + width += pg->surface_shape.clip_x; + height += pg->surface_shape.clip_y; + } + } else { + width = r->color_binding->width; + height = r->color_binding->height; + } + + populate_surface_binding_target_sized(d, color, width, height, target); +} + +static void update_surface_part(NV2AState *d, bool upload, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + SurfaceBinding target; + populate_surface_binding_target(d, color, &target); + + Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta; + + bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( + d->vram, target.vram_addr, + target.size, DIRTY_MEMORY_NV2A); + + if (upload && (pg_surface->buffer_dirty || mem_dirty)) { + // FIXME: We don't need to be so aggressive flushing the command list + // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); + pgraph_vk_ensure_not_in_render_pass(pg); + + unbind_surface(d, color); + + SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr); + if (surface != NULL) { + // FIXME: Support same color/zeta surface target? In the mean time, + // if the surface we just found is currently bound, just unbind it. + SurfaceBinding *other = (color ? r->zeta_binding + : r->color_binding); + if (surface == other) { + NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); + unbind_surface(d, !color); + } + } + + trace_nv2a_pgraph_surface_target( + color ? "COLOR" : "ZETA", target.vram_addr, + target.swizzle ? "sz" : "ln", + pg->surface_shape.anti_aliasing, + pg->surface_shape.clip_x, + pg->surface_shape.clip_width, pg->surface_shape.clip_y, + pg->surface_shape.clip_height); + + bool should_create = true; + + if (surface != NULL) { + bool is_compatible = + check_surface_compatibility(surface, &target, false); + + void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, + const char *layout, uint32_t anti_aliasing, + uint32_t clip_x, uint32_t clip_width, + uint32_t clip_y, uint32_t clip_height, + uint32_t pitch) = + surface->color ? trace_nv2a_pgraph_surface_match_color : + trace_nv2a_pgraph_surface_match_zeta; + + trace_fn(surface->vram_addr, surface->width, surface->height, + surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, + surface->shape.clip_x, surface->shape.clip_width, + surface->shape.clip_y, surface->shape.clip_height, + surface->pitch); + + assert(!(target.swizzle && pg->clearing)); + +#if 0 + if (surface->swizzle != target.swizzle) { + // Clears should only be done on linear surfaces. Avoid + // synchronization by allowing (1) a surface marked swizzled to + // be cleared under the assumption the entire surface is + // destined to be cleared and (2) a fully cleared linear surface + // to be marked swizzled. Strictly match size to avoid + // pathological cases. + is_compatible &= (pg->clearing || surface->cleared) && + check_surface_compatibility(surface, &target, true); + if (is_compatible) { + trace_nv2a_pgraph_surface_migrate_type( + target.swizzle ? "swizzled" : "linear"); + } + } +#endif + + if (is_compatible && color && + !check_surface_compatibility(surface, &target, true)) { + SurfaceBinding zeta_entry; + populate_surface_binding_target_sized( + d, !color, surface->width, surface->height, &zeta_entry); + hwaddr color_end = surface->vram_addr + surface->size; + hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; + is_compatible &= surface->vram_addr >= zeta_end || + zeta_entry.vram_addr >= color_end; + } + + if (is_compatible && !color && r->color_binding) { + is_compatible &= (surface->width == r->color_binding->width) && + (surface->height == r->color_binding->height); + } + + if (is_compatible) { + // FIXME: Refactor + pg->surface_binding_dim.width = surface->width; + pg->surface_binding_dim.clip_x = surface->shape.clip_x; + pg->surface_binding_dim.clip_width = surface->shape.clip_width; + pg->surface_binding_dim.height = surface->height; + pg->surface_binding_dim.clip_y = surface->shape.clip_y; + pg->surface_binding_dim.clip_height = surface->shape.clip_height; + surface->upload_pending |= mem_dirty; + pg->surface_zeta.buffer_dirty |= color; + should_create = false; + } else { + trace_nv2a_pgraph_surface_evict_reason( + "incompatible", surface->vram_addr); + compare_surfaces(surface, &target); + pgraph_vk_surface_download_if_dirty(d, surface); + invalidate_surface(d, surface); + } + } + + if (should_create) { + surface = get_any_compatible_invalid_surface(r, &target); + if (surface) { + migrate_surface_image(&target, surface); + } else { + surface = g_malloc(sizeof(SurfaceBinding)); + create_surface_image(pg, &target); + } + + *surface = target; + set_surface_label(pg, surface); + surface_put(d, surface); + + // FIXME: Refactor + pg->surface_binding_dim.width = target.width; + pg->surface_binding_dim.clip_x = target.shape.clip_x; + pg->surface_binding_dim.clip_width = target.shape.clip_width; + pg->surface_binding_dim.height = target.height; + pg->surface_binding_dim.clip_y = target.shape.clip_y; + pg->surface_binding_dim.clip_height = target.shape.clip_height; + + if (color && r->zeta_binding && + (r->zeta_binding->width != target.width || + r->zeta_binding->height != target.height)) { + pg->surface_zeta.buffer_dirty = true; + } + } + + void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, + const char *layout, uint32_t anti_aliasing, + uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, + uint32_t clip_height, uint32_t pitch) = + color ? (should_create ? trace_nv2a_pgraph_surface_create_color : + trace_nv2a_pgraph_surface_hit_color) : + (should_create ? trace_nv2a_pgraph_surface_create_zeta : + trace_nv2a_pgraph_surface_hit_zeta); + trace_fn(surface->vram_addr, surface->width, surface->height, + surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, + surface->shape.clip_x, surface->shape.clip_width, + surface->shape.clip_y, surface->shape.clip_height, surface->pitch); + + bind_surface(r, surface); + pg_surface->buffer_dirty = false; + } + + if (!upload && pg_surface->draw_dirty) { + if (!tcg_enabled()) { + // FIXME: Cannot monitor for reads/writes; flush now + download_surface(d, color ? r->color_binding : r->zeta_binding, + true); + } + + pg_surface->write_enabled_cache = false; + pg_surface->draw_dirty = false; + } +} + +// FIXME: Move to common? +void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + pg->surface_shape.z_format = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + color_write = color_write && + (pg->clearing || pgraph_color_write_enabled(pg)); + zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); + + if (upload) { + bool fb_dirty = framebuffer_dirty(pg); + if (fb_dirty) { + memcpy(&pg->last_surface_shape, &pg->surface_shape, + sizeof(SurfaceShape)); + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } + + if (pg->surface_color.buffer_dirty) { + unbind_surface(d, true); + } + + if (color_write) { + update_surface_part(d, true, true); + } + + if (pg->surface_zeta.buffer_dirty) { + unbind_surface(d, false); + } + + if (zeta_write) { + update_surface_part(d, true, false); + } + } else { + if ((color_write || pg->surface_color.write_enabled_cache) + && pg->surface_color.draw_dirty) { + update_surface_part(d, false, true); + } + if ((zeta_write || pg->surface_zeta.write_enabled_cache) + && pg->surface_zeta.draw_dirty) { + update_surface_part(d, false, false); + } + } + + if (upload) { + pg->draw_time++; + } + + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + + if (r->color_binding) { + r->color_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_vk_upload_surface_data(d, r->color_binding, false); + r->color_binding->draw_time = pg->draw_time; + r->color_binding->swizzle = swizzle; + } + } + + if (r->zeta_binding) { + r->zeta_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_vk_upload_surface_data(d, r->zeta_binding, false); + r->zeta_binding->draw_time = pg->draw_time; + r->zeta_binding->swizzle = swizzle; + } + } + + // Sanity check color and zeta dimensions match + if (r->color_binding && r->zeta_binding) { + assert(r->color_binding->width == r->zeta_binding->width); + assert(r->color_binding->height == r->zeta_binding->height); + } + + expire_old_surfaces(d); + prune_invalid_surfaces(r, num_invalid_surfaces_to_keep); +} + +static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format, + VkImageUsageFlags usage) +{ + VkPhysicalDeviceImageFormatInfo2 pdif2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + }; + VkImageFormatProperties2 props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; + VkResult result = vkGetPhysicalDeviceImageFormatProperties2( + r->physical_device, &pdif2, &props); + return result == VK_SUCCESS; +} + +static bool check_surface_internal_formats_supported( + PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count) +{ + bool all_supported = true; + for (int i = 0; i < count; i++) { + const SurfaceFormatInfo *f = &fmts[i]; + if (f->host_bytes_per_pixel) { + all_supported &= + check_format_and_usage_supported(r, f->vk_format, f->usage); + } + } + return all_supported; +} + +void pgraph_vk_init_surfaces(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // Make sure all surface format types are supported. We don't expect issue + // with these, and therefore have no fallback mechanism. + bool color_formats_supported = check_surface_internal_formats_supported( + r, kelvin_surface_color_format_vk_map, + ARRAY_SIZE(kelvin_surface_color_format_vk_map)); + assert(color_formats_supported); + + // Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT + // format, fall back to D32_SFLOAT_S8_UINT otherwise. + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16; + if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint, + 1)) { + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + zeta_d24_unorm_s8_uint; + } else if (check_surface_internal_formats_supported( + r, &zeta_d32_sfloat_s8_uint, 1)) { + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + zeta_d32_sfloat_s8_uint; + } else { + assert(!"No suitable depth-stencil format supported"); + } + + QTAILQ_INIT(&r->surfaces); + QTAILQ_INIT(&r->invalid_surfaces); + + r->downloads_pending = false; + qemu_event_init(&r->downloads_complete, false); + qemu_event_init(&r->dirty_surfaces_download_complete, false); + + r->color_binding = NULL; + r->zeta_binding = NULL; + r->framebuffer_dirty = true; + + pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal +} + +void pgraph_vk_finalize_surfaces(PGRAPHState *pg) +{ + pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph)); +} + +void pgraph_vk_surface_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // Clear last surface shape to force recreation of buffers at next draw + pg->surface_color.draw_dirty = false; + pg->surface_zeta.draw_dirty = false; + memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); + unbind_surface(d, true); + unbind_surface(d, false); + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + invalidate_surface(d, s); + } + prune_invalid_surfaces(r, 0); + + pgraph_vk_reload_surface_scale_factor(pg); +} diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c new file mode 100644 index 0000000000..10a4ccd2e4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -0,0 +1,1456 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/s3tc.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "qemu/fast-hash.h" +#include "qemu/lru.h" +#include "renderer.h" + +static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode); + +static const VkImageType dimensionality_to_vk_image_type[] = { + 0, + VK_IMAGE_TYPE_1D, + VK_IMAGE_TYPE_2D, + VK_IMAGE_TYPE_3D, +}; +static const VkImageViewType dimensionality_to_vk_image_view_type[] = { + 0, + VK_IMAGE_VIEW_TYPE_1D, + VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_VIEW_TYPE_3D, +}; + +static VkSamplerAddressMode lookup_texture_address_mode(int idx) +{ + assert(0 < idx && idx < ARRAY_SIZE(pgraph_texture_addr_vk_map)); + return pgraph_texture_addr_vk_map[idx]; +} + +// FIXME: Move to common +// FIXME: We can shrink the size of this structure +// FIXME: Use simple allocator +typedef struct TextureLevel { + unsigned int width, height, depth; + hwaddr vram_addr; + void *decoded_data; + size_t decoded_size; +} TextureLevel; + +typedef struct TextureLayer { + TextureLevel levels[16]; +} TextureLayer; + +typedef struct TextureLayout { + TextureLayer layers[6]; +} TextureLayout; + +// FIXME: Move to common +static enum S3TC_DECOMPRESS_FORMAT kelvin_format_to_s3tc_format(int color_format) +{ + switch (color_format) { + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5: + return S3TC_DECOMPRESS_FORMAT_DXT1; + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8: + return S3TC_DECOMPRESS_FORMAT_DXT3; + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8: + return S3TC_DECOMPRESS_FORMAT_DXT5; + default: + assert(false); + } +} + +// FIXME: Move to common +static void memcpy_image(void *dst, void *src, int min_stride, int dst_stride, int src_stride, int height) +{ + uint8_t *dst_ptr = (uint8_t *)dst; + uint8_t *src_ptr = (uint8_t *)src; + + for (int i = 0; i < height; i++) { + memcpy(dst_ptr, src_ptr, min_stride); + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +// FIXME: Move to common +static size_t get_cubemap_layer_size(PGRAPHState *pg, TextureShape s) +{ + BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format]; + bool is_compressed = + pgraph_is_texture_format_compressed(pg, s.color_format); + unsigned int block_size; + + unsigned int w = s.width, h = s.height; + size_t length = 0; + + if (!f.linear && s.border) { + w = MAX(16, w * 2); + h = MAX(16, h * 2); + } + + if (is_compressed) { + block_size = + s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ? + 8 : + 16; + } + + for (int level = 0; level < s.levels; level++) { + if (is_compressed) { + length += w / 4 * h / 4 * block_size; + } else { + length += w * h * f.bytes_per_pixel; + } + + w /= 2; + h /= 2; + } + + return ROUND_UP(length, NV2A_CUBEMAP_FACE_ALIGNMENT); +} + +// FIXME: Move to common +// FIXME: More refactoring +// FIXME: Possible parallelization of decoding +// FIXME: Bounds checking +static TextureLayout *get_texture_layout(PGRAPHState *pg, int texture_idx) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + TextureShape s = pgraph_get_texture_shape(pg, texture_idx); + BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format]; + + NV2A_VK_DGROUP_BEGIN("Texture %d: cubemap=%d, dimensionality=%d, color_format=0x%x, levels=%d, width=%d, height=%d, depth=%d border=%d, min_mipmap_level=%d, max_mipmap_level=%d, pitch=%d", + texture_idx, + s.cubemap, + s.dimensionality, + s.color_format, + s.levels, + s.width, + s.height, + s.depth, + s.border, + s.min_mipmap_level, + s.max_mipmap_level, + s.pitch + ); + + // Sanity checks on below assumptions + if (f.linear) { + assert(s.dimensionality == 2); + } + if (s.cubemap) { + assert(s.dimensionality == 2); + assert(!f.linear); + } + assert(s.dimensionality > 1); + + const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx); + void *texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset; + + size_t texture_palette_data_size; + const hwaddr texture_palette_vram_offset = + pgraph_get_texture_palette_phys_addr_length(pg, texture_idx, + &texture_palette_data_size); + void *palette_data_ptr = (char *)d->vram_ptr + texture_palette_vram_offset; + + unsigned int adjusted_width = s.width, adjusted_height = s.height, + adjusted_pitch = s.pitch, adjusted_depth = s.depth; + + if (!f.linear && s.border) { + adjusted_width = MAX(16, adjusted_width * 2); + adjusted_height = MAX(16, adjusted_height * 2); + adjusted_pitch = adjusted_width * (s.pitch / s.width); + adjusted_depth = MAX(16, s.depth * 2); + } + + TextureLayout *layout = g_malloc0(sizeof(TextureLayout)); + + if (f.linear) { + assert(s.pitch % f.bytes_per_pixel == 0 && "Can't handle strides unaligned to pixels"); + + size_t converted_size; + uint8_t *converted = pgraph_convert_texture_data( + s, texture_data_ptr, palette_data_ptr, adjusted_width, + adjusted_height, 1, adjusted_pitch, 0, &converted_size); + + if (!converted) { + int dst_stride = adjusted_width * f.bytes_per_pixel; + assert(adjusted_width <= s.width); + converted_size = dst_stride * adjusted_height; + converted = g_malloc(converted_size); + memcpy_image(converted, texture_data_ptr, adjusted_width * f.bytes_per_pixel, dst_stride, + adjusted_pitch, adjusted_height); + } + + assert(s.levels == 1); + layout->layers[0].levels[0] = (TextureLevel){ + .width = adjusted_width, + .height = adjusted_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + NV2A_VK_DGROUP_END(); + return layout; + } + + bool is_compressed = pgraph_is_texture_format_compressed(pg, s.color_format); + size_t block_size = 0; + if (is_compressed) { + bool is_dxt1 = + s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5; + block_size = is_dxt1 ? 8 : 16; + } + + if (s.dimensionality == 2) { + hwaddr layer_size = 0; + if (s.cubemap) { + layer_size = get_cubemap_layer_size(pg, s); + } + + const int num_layers = s.cubemap ? 6 : 1; + for (int layer = 0; layer < num_layers; layer++) { + unsigned int width = adjusted_width, height = adjusted_height; + texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset + + layer * layer_size; + + for (int level = 0; level < s.levels; level++) { + NV2A_VK_DPRINTF("Layer %d Level %d @ %x", layer, level, (int)((char*)texture_data_ptr - (char*)d->vram_ptr)); + + width = MAX(width, 1); + height = MAX(height, 1); + if (is_compressed) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size + unsigned int tex_width = width, tex_height = height; + unsigned int physical_width = (width + 3) & ~3, + physical_height = (height + 3) & ~3; + // if (physical_width != width) { + // glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); + // } + + size_t converted_size = width * height * 4; + uint8_t *converted = s3tc_decompress_2d( + kelvin_format_to_s3tc_format(s.color_format), + texture_data_ptr, physical_width, physical_height); + assert(converted); + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + + // glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); + // glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); + tex_width = s.width; + tex_height = s.height; + // if (physical_width == width) { + // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + // } + + // FIXME: Crop by 4 pixels on each side + } + + layout->layers[layer].levels[level] = (TextureLevel){ + .width = tex_width, + .height = tex_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += + physical_width / 4 * physical_height / 4 * block_size; + } else { + unsigned int pitch = width * f.bytes_per_pixel; + unsigned int tex_width = width, tex_height = height; + + size_t converted_size = height * pitch; + uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); + unswizzle_rect(texture_data_ptr, width, height, + unswizzled, pitch, f.bytes_per_pixel); + + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data_ptr, width, height, 1, + pitch, 0, &converted_size); + + if (converted) { + g_free(unswizzled); + } else { + converted = unswizzled; + } + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + tex_width = s.width; + tex_height = s.height; + // pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; + + // FIXME: Crop by 4 pixels on each side + } + + layout->layers[layer].levels[level] = (TextureLevel){ + .width = tex_width, + .height = tex_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width * height * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + } + } + } else if (s.dimensionality == 3) { + assert(!f.linear); + unsigned int width = adjusted_width, height = adjusted_height, + depth = adjusted_depth; + + for (int level = 0; level < s.levels; level++) { + if (is_compressed) { + assert(width % 4 == 0 && height % 4 == 0 && + "Compressed 3D texture virtual size"); + + width = MAX(width, 4); + height = MAX(height, 4); + depth = MAX(depth, 1); + + size_t converted_size = width * height * depth * 4; + uint8_t *converted = s3tc_decompress_3d( + kelvin_format_to_s3tc_format(s.color_format), + texture_data_ptr, width, height, depth); + assert(converted); + + layout->layers[0].levels[level] = (TextureLevel){ + .width = width, + .height = height, + .depth = depth, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width / 4 * height / 4 * depth * block_size; + } else { + width = MAX(width, 1); + height = MAX(height, 1); + depth = MAX(depth, 1); + + unsigned int row_pitch = width * f.bytes_per_pixel; + unsigned int slice_pitch = row_pitch * height; + + size_t unswizzled_size = slice_pitch * depth; + uint8_t *unswizzled = g_malloc(unswizzled_size); + unswizzle_box(texture_data_ptr, width, height, depth, + unswizzled, row_pitch, slice_pitch, + f.bytes_per_pixel); + + size_t converted_size; + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data_ptr, width, height, depth, + row_pitch, slice_pitch, &converted_size); + + if (converted) { + g_free(unswizzled); + } else { + converted = unswizzled; + converted_size = unswizzled_size; + } + + layout->layers[0].levels[level] = (TextureLevel){ + .width = width, + .height = height, + .depth = depth, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width * height * depth * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + depth /= 2; + } + } + + NV2A_VK_DGROUP_END(); + return layout; +} + +struct pgraph_texture_possibly_dirty_struct { + hwaddr addr, end; +}; + +static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) +{ + struct pgraph_texture_possibly_dirty_struct *test = opaque; + + TextureBinding *tnode = container_of(node, TextureBinding, node); + if (tnode->possibly_dirty) { + return; + } + + uintptr_t k_tex_addr = tnode->key.texture_vram_offset; + uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; + bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); + + if (tnode->key.palette_length > 0) { + uintptr_t k_pal_addr = tnode->key.palette_vram_offset; + uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; + overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); + } + + tnode->possibly_dirty |= overlapping; +} + +void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, + hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; + addr &= TARGET_PAGE_MASK; + assert(end <= memory_region_size(d->vram)); + + struct pgraph_texture_possibly_dirty_struct test = { + .addr = addr, + .end = end, + }; + + lru_visit_active(&d->pgraph.vk_renderer_state->texture_cache, + mark_textures_possibly_dirty_visitor, + &test); +} + +static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, + DIRTY_MEMORY_NV2A_TEX); +} + +// Check if any of the pages spanned by the a texture are dirty. +static bool check_texture_possibly_dirty(NV2AState *d, + hwaddr texture_vram_offset, + unsigned int length, + hwaddr palette_vram_offset, + unsigned int palette_length) +{ + bool possibly_dirty = false; + if (check_texture_dirty(d, texture_vram_offset, length)) { + possibly_dirty = true; + pgraph_vk_mark_textures_possibly_dirty(d, texture_vram_offset, length); + } + if (palette_length && check_texture_dirty(d, palette_vram_offset, + palette_length)) { + possibly_dirty = true; + pgraph_vk_mark_textures_possibly_dirty(d, palette_vram_offset, + palette_length); + } + return possibly_dirty; +} + +// FIXME: Make sure we update sampler when data matches. Should we add filtering +// options to the textureshape? +static void upload_texture_image(PGRAPHState *pg, int texture_idx, + TextureBinding *binding) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape *state = &binding->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); + + g_autofree TextureLayout *layout = get_texture_layout(pg, texture_idx); + const int num_layers = state->cubemap ? 6 : 1; + + // Calculate decoded texture data size + size_t texture_data_size = 0; + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + size_t size = layer->levels[level_idx].decoded_size; + assert(size); + texture_data_size += size; + } + } + + assert(texture_data_size <= + r->storage_buffers[BUFFER_STAGING_SRC].buffer_size); + + // Copy texture data to mapped device buffer + uint8_t *mapped_memory_ptr; + + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, + (void *)&mapped_memory_ptr)); + + int num_regions = num_layers * state->levels; + g_autofree VkBufferImageCopy *regions = + g_malloc0_n(num_regions, sizeof(VkBufferImageCopy)); + + VkBufferImageCopy *region = regions; + VkDeviceSize buffer_offset = 0; + + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + NV2A_VK_DPRINTF("Layer %d", layer_idx); + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + TextureLevel *level = &layer->levels[level_idx]; + NV2A_VK_DPRINTF(" - Level %d, w=%d h=%d d=%d @ %08" HWADDR_PRIx, + level_idx, level->width, level->height, + level->depth, buffer_offset); + memcpy(mapped_memory_ptr + buffer_offset, level->decoded_data, + level->decoded_size); + *region = (VkBufferImageCopy){ + .bufferOffset = buffer_offset, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = level_idx, + .imageSubresource.baseArrayLayer = layer_idx, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = + (VkExtent3D){ level->width, level->height, level->depth }, + }; + buffer_offset += level->decoded_size; + region++; + } + } + assert(buffer_offset <= texture_data_size); + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation); + + // FIXME: Use nondraw. Need to fill and copy tex buffer at once + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format, + binding->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + binding->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer, + binding->image, binding->current_layout, + num_regions, regions); + + pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format, + binding->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + binding->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_4); + pgraph_vk_end_single_time_commands(pg, cmd); + + // Release decoded texture data + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + g_free(layer->levels[level_idx].decoded_data); + } + } +} + +static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, + TextureBinding *texture) +{ + assert(!surface->color); + + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape *state = &texture->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + + VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + size_t copied_image_size = + scaled_width * scaled_height * surface->host_fmt.host_bytes_per_pixel; + size_t stencil_buffer_offset = 0; + size_t stencil_buffer_size = 0; + + int num_regions = 0; + VkBufferImageCopy regions[2]; + regions[num_regions++] = (VkBufferImageCopy){ + .bufferOffset = 0, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, // Tightly packed + .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){0, 0, 0}, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + stencil_buffer_offset = scaled_width * scaled_height * 4; + stencil_buffer_size = scaled_width * scaled_height; + copied_image_size += stencil_buffer_size; + + regions[num_regions++] = (VkBufferImageCopy){ + .bufferOffset = stencil_buffer_offset, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, // Tightly packed + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){0, 0, 0}, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + } + + bool use_compute_to_convert_depth_stencil = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + assert(use_compute_to_convert_depth_stencil && "Unimplemented"); + + StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST]; + assert(dst_storage_buffer->buffer_size >= copied_image_size); + + vkCmdCopyImageToBuffer( + cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dst_storage_buffer->buffer, + num_regions, regions); + + if (use_compute_to_convert_depth_stencil) { + size_t packed_image_size = scaled_width * scaled_height * 4; + + VkBufferMemoryBarrier pre_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_barrier, 0, NULL); + + pgraph_vk_pack_depth_stencil( + pg, surface, cmd, + r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false); + + VkBufferMemoryBarrier post_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, + .size = packed_image_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_barrier, 0, NULL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + regions[0] = (VkBufferImageCopy){ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, + }; + + vkCmdCopyBufferToImage( + cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions); + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + pgraph_vk_end_nondraw_commands(pg, cmd); + + texture->draw_time = surface->draw_time; +} + +// FIXME: Should be able to skip the copy and sample the original surface image +static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, + TextureBinding *texture) +{ + if (!surface->color) { + copy_zeta_surface_to_texture(pg, surface, texture); + return; + } + + TextureShape *state = &texture->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + + VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + VkImageCopy region = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.layerCount = 1, + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.layerCount = 1, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + }; + pgraph_apply_scaling_factor(pg, ®ion.extent.width, + ®ion.extent.height); + vkCmdCopyImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture->image, + texture->current_layout, 1, ®ion); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + pgraph_vk_end_nondraw_commands(pg, cmd); + + texture->draw_time = surface->draw_time; +} + +static bool check_surface_to_texture_compatiblity(const SurfaceBinding *surface, + const TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + if ((!surface->swizzle && surface->pitch != shape->pitch) || + surface->width != shape->width || + surface->height != shape->height) { + return false; + } + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + if (surface->shape.zeta_format == NV097_SET_SURFACE_FORMAT_ZETA_Z24S8) { + return true; + } + return false; + } + + if (shape->cubemap) { + // FIXME: Support rendering surface to cubemap face + return false; + } + + if (shape->levels > 1) { + // FIXME: Support rendering surface to mip levels + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: + break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void create_dummy_texture(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = 16, + .extent.height = 16, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = 0, + }; + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VkImage texture_image; + VmaAllocation texture_allocation; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &texture_image, + &texture_allocation, NULL)); + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = texture_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8_UNORM, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = image_create_info.mipLevels, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = image_create_info.arrayLayers, + .components = (VkComponentMapping){ VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R }, + }; + VkImageView texture_image_view; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &texture_image_view)); + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_FALSE, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + }; + + VkSampler texture_sampler; + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &texture_sampler)); + + // Copy texture data to mapped device buffer + uint8_t *mapped_memory_ptr; + size_t texture_data_size = + image_create_info.extent.width * image_create_info.extent.height; + + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, + (void *)&mapped_memory_ptr)); + memset(mapped_memory_ptr, 0xff, texture_data_size); + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, texture_image, VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ image_create_info.extent.width, + image_create_info.extent.height, 1 }, + }; + vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer, + texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + + pgraph_vk_transition_image_layout(pg, cmd, texture_image, + VK_FORMAT_R8_UNORM, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_end_single_time_commands(pg, cmd); + + r->dummy_texture = (TextureBinding){ + .key.scale = 1.0, + .image = texture_image, + .current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .allocation = texture_allocation, + .image_view = texture_image_view, + .sampler = texture_sampler, + }; +} + +static void destroy_dummy_texture(PGRAPHVkState *r) +{ + texture_cache_release_node_resources(r, &r->dummy_texture); +} + +static void set_texture_label(PGRAPHState *pg, TextureBinding *texture) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + g_autofree gchar *label = g_strdup_printf( + "Texture %" HWADDR_PRIx "h fmt:%02xh %dx%dx%d lvls:%d", + texture->key.texture_vram_offset, texture->key.state.color_format, + texture->key.state.width, texture->key.state.height, + texture->key.state.depth, texture->key.state.levels); + + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = (uint64_t)texture->image, + .pObjectName = label, + }; + + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, texture->allocation, label); +} + +static void create_texture(PGRAPHState *pg, int texture_idx) +{ + NV2A_VK_DGROUP_BEGIN("Creating texture %d", texture_idx); + + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape state = pgraph_get_texture_shape(pg, texture_idx); // FIXME: Check for pad issues + BasicColorFormatInfo f_basic = kelvin_color_format_info_map[state.color_format]; + + const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx); + size_t texture_palette_data_size; + const hwaddr texture_palette_vram_offset = + pgraph_get_texture_palette_phys_addr_length(pg, texture_idx, + &texture_palette_data_size); + + size_t texture_length = pgraph_get_texture_length(pg, &state); + + TextureKey key; + memset(&key, 0, sizeof(key)); + key.state = state; + key.texture_vram_offset = texture_vram_offset; + key.texture_length = texture_length; + key.palette_vram_offset = texture_palette_vram_offset; + key.palette_length = texture_palette_data_size; + key.scale = 1; + + bool is_indexed = (state.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); + + bool possibly_dirty = false; + bool possibly_dirty_checked = false; + bool surface_to_texture = false; + + // Check active surfaces to see if this texture was a render target + SurfaceBinding *surface = pgraph_vk_surface_get(d, texture_vram_offset); + if (surface && state.levels == 1) { + surface_to_texture = + check_surface_to_texture_compatiblity(surface, &state); + + if (surface_to_texture && surface->upload_pending) { + pgraph_vk_upload_surface_data(d, surface, false); + } + } + + if (!surface_to_texture) { + // FIXME: Restructure to support rendering surfaces to cubemap faces + + // Writeback any surfaces which this texture may index + hwaddr tex_vram_end = texture_vram_offset + texture_length - 1; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= tex_vram_end + || texture_vram_offset >= surf_vram_end); + if (overlapping) { + pgraph_vk_surface_download_if_dirty(d, surface); + } + } + } + + if (surface_to_texture && pg->surface_scale_factor > 1) { + key.scale = pg->surface_scale_factor; + } + + uint64_t key_hash = fast_hash((void*)&key, sizeof(key)); + LruNode *node = lru_lookup(&r->texture_cache, key_hash, &key); + TextureBinding *snode = container_of(node, TextureBinding, node); + bool binding_found = snode->image != VK_NULL_HANDLE; + + if (binding_found) { + NV2A_VK_DPRINTF("Cache hit"); + r->texture_bindings[texture_idx] = snode; + possibly_dirty |= snode->possibly_dirty; + } else { + possibly_dirty = true; + } + + if (!surface_to_texture && !possibly_dirty_checked) { + possibly_dirty |= check_texture_possibly_dirty( + d, texture_vram_offset, texture_length, texture_palette_vram_offset, + texture_palette_data_size); + } + + // Calculate hash of texture data, if necessary + void *texture_data = (char*)d->vram_ptr + texture_vram_offset; + void *palette_data = (char*)d->vram_ptr + texture_palette_vram_offset; + + uint64_t content_hash = 0; + if (!surface_to_texture && possibly_dirty) { + content_hash = fast_hash(texture_data, texture_length); + if (is_indexed) { + content_hash ^= fast_hash(palette_data, texture_palette_data_size); + } + } + + if (binding_found) { + if (surface_to_texture) { + // FIXME: Add draw time tracking + if (surface->draw_time != snode->draw_time) { + copy_surface_to_texture(pg, surface, snode); + } + } else { + if (possibly_dirty && content_hash != snode->hash) { + upload_texture_image(pg, texture_idx, snode); + snode->hash = content_hash; + } + } + + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + + memcpy(&snode->key, &key, sizeof(key)); + snode->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + snode->possibly_dirty = false; + snode->hash = content_hash; + + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state.color_format]; + assert(vkf.vk_format != 0); + assert(0 < state.dimensionality); + assert(state.dimensionality < ARRAY_SIZE(dimensionality_to_vk_image_type)); + assert(state.dimensionality < + ARRAY_SIZE(dimensionality_to_vk_image_view_type)); + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = dimensionality_to_vk_image_type[state.dimensionality], + .extent.width = state.width, // FIXME: Use adjusted size? + .extent.height = state.height, + .extent.depth = state.depth, + .mipLevels = f_basic.linear ? 1 : state.levels, + .arrayLayers = state.cubemap ? 6 : 1, + .format = vkf.vk_format, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = (state.cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0), + }; + + if (surface_to_texture) { + pgraph_apply_scaling_factor(pg, &image_create_info.extent.width, + &image_create_info.extent.height); + } + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &snode->image, + &snode->allocation, NULL)); + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = snode->image, + .viewType = state.cubemap ? + VK_IMAGE_VIEW_TYPE_CUBE : + dimensionality_to_vk_image_view_type[state.dimensionality], + .format = vkf.vk_format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = image_create_info.mipLevels, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = image_create_info.arrayLayers, + .components = vkf.component_map, + }; + + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &snode->image_view)); + + + void *sampler_next_struct = NULL; + + VkSamplerCustomBorderColorCreateInfoEXT custom_border_color_create_info; + VkBorderColor vk_border_color; + uint32_t border_color_pack32 = + pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4); + + if (r->custom_border_color_extension_enabled) { + float border_color_rgba[4]; + pgraph_argb_pack32_to_rgba_float(border_color_pack32, border_color_rgba); + + custom_border_color_create_info = + (VkSamplerCustomBorderColorCreateInfoEXT){ + .sType = + VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .customBorderColor.float32 = { border_color_rgba[0], + border_color_rgba[1], + border_color_rgba[2], + border_color_rgba[3] }, + .format = image_view_create_info.format, + .pNext = sampler_next_struct + }; + + vk_border_color = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; + sampler_next_struct = &custom_border_color_create_info; + } else { + // FIXME: Handle custom color in shader + if (border_color_pack32 == 0x00000000) { + vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK; + } else if (border_color_pack32 == 0xff000000) { + vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + } else { + vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_WHITE; + } + } + + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); + if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED"); + + unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); + assert(mag_filter < ARRAY_SIZE(pgraph_texture_mag_filter_vk_map)); + + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + assert(min_filter < ARRAY_SIZE(pgraph_texture_min_filter_vk_map)); + + bool mipmap_nearest = + f_basic.linear || image_create_info.mipLevels == 1 || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD; + + uint32_t address = + pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4); + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_LINEAR, // FIXME + .minFilter = VK_FILTER_LINEAR, // FIXME + .addressModeU = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)), + .addressModeV = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV)), + .addressModeW = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP)), + .anisotropyEnable = VK_FALSE, + // .anisotropyEnable = VK_TRUE, + // .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .borderColor = vk_border_color, + .unnormalizedCoordinates = f_basic.linear ? VK_TRUE : VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .minLod = 0.0, + .maxLod = f_basic.linear ? 0.0 : image_create_info.mipLevels, + .mipLodBias = 0.0, + .pNext = sampler_next_struct, + }; + + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &snode->sampler)); + + set_texture_label(pg, snode); + + r->texture_bindings[texture_idx] = snode; + + if (surface_to_texture) { + copy_surface_to_texture(pg, surface, snode); + } else { + upload_texture_image(pg, texture_idx, snode); + snode->draw_time = 0; + } + + NV2A_VK_DGROUP_END(); +} + +static bool check_textures_dirty(PGRAPHState *pg) +{ + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (pg->texture_dirty[i]) { + return true; + } + } + return false; +} + +static void update_timestamps(PGRAPHVkState *r) +{ + for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) { + if (r->texture_bindings[i]) { + r->texture_bindings[i]->submit_time = r->submit_count; + } + } +} + +void pgraph_vk_bind_textures(NV2AState *d) +{ + NV2A_VK_DGROUP_BEGIN("%s", __func__); + + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Check for modifications on bind fastpath (CPU hook) + // FIXME: Mark textures that are sourced from surfaces so we can track them + + r->texture_bindings_changed = false; + + if (!check_textures_dirty(pg)) { + NV2A_VK_DPRINTF("Not dirty"); + NV2A_VK_DGROUP_END(); + update_timestamps(r); + return; + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (!pgraph_is_texture_enabled(pg, i)) { + r->texture_bindings[i] = &r->dummy_texture; + continue; + } + if (!pg->texture_dirty[i]) { // FIXME: Fails to check memory + continue; + } + + create_texture(pg, i); + + pg->texture_dirty[i] = false; // FIXME: Move to renderer? + } + + r->texture_bindings_changed = true; + update_timestamps(r); + NV2A_VK_DGROUP_END(); +} + +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + TextureBinding *snode = container_of(node, TextureBinding, node); + + snode->image = VK_NULL_HANDLE; + snode->allocation = VK_NULL_HANDLE; + snode->image_view = VK_NULL_HANDLE; + snode->sampler = VK_NULL_HANDLE; +} + +static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode) +{ + vkDestroySampler(r->device, snode->sampler, NULL); + snode->sampler = VK_NULL_HANDLE; + + vkDestroyImageView(r->device, snode->image_view, NULL); + snode->image_view = VK_NULL_HANDLE; + + vmaDestroyImage(r->allocator, snode->image, snode->allocation); + snode->image = VK_NULL_HANDLE; + snode->allocation = VK_NULL_HANDLE; +} + +static bool texture_cache_entry_pre_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache); + TextureBinding *snode = container_of(node, TextureBinding, node); + + // FIXME: Simplify. We don't really need to check bindings + + + // Currently bound + for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) { + if (r->texture_bindings[i] == snode) { + return false; + } + } + + // Used in command buffer + if (r->in_command_buffer && snode->submit_time == r->submit_count) { + return false; + } + + return true; +} + +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache); + TextureBinding *snode = container_of(node, TextureBinding, node); + texture_cache_release_node_resources(r, snode); +} + +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + TextureBinding *snode = container_of(node, TextureBinding, node); + return memcmp(&snode->key, key, sizeof(TextureKey)); +} + +static void texture_cache_init(PGRAPHVkState *r) +{ + const size_t texture_cache_size = 1024; + lru_init(&r->texture_cache); + r->texture_cache_entries = g_malloc_n(texture_cache_size, sizeof(TextureBinding)); + assert(r->texture_cache_entries != NULL); + for (int i = 0; i < texture_cache_size; i++) { + lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node); + } + r->texture_cache.init_node = texture_cache_entry_init; + r->texture_cache.compare_nodes = texture_cache_entry_compare; + r->texture_cache.pre_node_evict = texture_cache_entry_pre_evict; + r->texture_cache.post_node_evict = texture_cache_entry_post_evict; +} + +static void texture_cache_finalize(PGRAPHVkState *r) +{ + lru_flush(&r->texture_cache); + g_free(r->texture_cache_entries); + r->texture_cache_entries = NULL; +} + +void pgraph_vk_trim_texture_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Allow specifying some amount to trim by + + int num_to_evict = r->texture_cache.num_used / 4; + int num_evicted = 0; + + while (num_to_evict-- && lru_try_evict_one(&r->texture_cache)) { + num_evicted += 1; + } + + NV2A_VK_DPRINTF("Evicted %d textures, %d remain", num_evicted, r->texture_cache.num_used); +} + +void pgraph_vk_init_textures(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + texture_cache_init(r); + create_dummy_texture(pg); +} + +void pgraph_vk_finalize_textures(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + destroy_dummy_texture(r); + texture_cache_finalize(r); + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + r->texture_bindings[i] = NULL; + } +} diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c new file mode 100644 index 0000000000..6625520c65 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -0,0 +1,312 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data, + VkDeviceSize size) +{ + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); + return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1, + 1); +} + +VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data, + VkDeviceSize *sizes, + size_t count) +{ + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); + return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data, + sizes, count, 1); +} + +void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, + void *data, VkDeviceSize size) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + size_t offset_bit = offset / 4096; + size_t nbits = size / 4096; + if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) { + // Vertex data changed while building the draw list. Finish drawing + // before updating RAM buffer. + pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY); + } + + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); + memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size); + + bitmap_set(r->uploaded_bitmap, offset_bit, nbits); +} + +static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->num_vertex_ram_buffer_syncs < + ARRAY_SIZE(r->vertex_ram_buffer_syncs)); + r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] = + (MemorySyncRequirement){ .addr = addr, .size = size }; +} + +static const VkFormat float_to_count[] = { + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT, +}; + +static const VkFormat ub_to_count[] = { + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8A8_UNORM, +}; + +static const VkFormat s1_to_count[] = { + VK_FORMAT_R16_SNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16A16_SNORM, +}; + +static const VkFormat s32k_to_count[] = { + VK_FORMAT_R16_SSCALED, + VK_FORMAT_R16G16_SSCALED, + VK_FORMAT_R16G16B16_SSCALED, + VK_FORMAT_R16G16B16A16_SSCALED, +}; + +static char const * const vertex_data_array_format_to_str[] = { + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP", +}; + +void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, + bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int num_elements = max_element - min_element + 1; + + if (inline_data) { + NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", + __func__, num_elements, inline_stride); + } else { + NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); + } + + pg->compressed_attrs = 0; + pg->uniform_attrs = 0; + pg->swizzle_attrs = 0; + + r->num_active_vertex_attribute_descriptions = 0; + r->num_active_vertex_binding_descriptions = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i, + vertex_data_array_format_to_str[attr->format], + attr->count, attr->stride); + r->vertex_attribute_to_description_location[i] = -1; + if (!attr->count) { + pg->uniform_attrs |= 1 << i; + NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}", + attr->inline_value[0], attr->inline_value[1], + attr->inline_value[2], attr->inline_value[3]); + NV2A_VK_DGROUP_END(); + continue; + } + + VkFormat vk_format; + bool needs_conversion = false; + bool d3d_swizzle = false; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + assert(attr->count == 4); + d3d_swizzle = true; + /* fallthru */ + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + assert(attr->count <= ARRAY_SIZE(ub_to_count)); + vk_format = ub_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + assert(attr->count <= ARRAY_SIZE(s1_to_count)); + vk_format = s1_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + assert(attr->count <= ARRAY_SIZE(float_to_count)); + vk_format = float_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + assert(attr->count <= ARRAY_SIZE(s32k_to_count)); + vk_format = s32k_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + vk_format = + VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ?? + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + assert(attr->count == 1); + needs_conversion = true; + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); + hwaddr attrib_data_addr; + size_t stride; + + if (needs_conversion) { + pg->compressed_attrs |= (1 << i); + } + if (d3d_swizzle) { + pg->swizzle_attrs |= (1 << i); + } + + hwaddr start = 0; + if (inline_data) { + attrib_data_addr = attr->inline_array_offset; + stride = inline_stride; + } else { + hwaddr dma_len; + uint8_t *attr_data = (uint8_t *)nv_dma_map( + d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, + &dma_len); + assert(attr->offset < dma_len); + attrib_data_addr = attr_data + attr->offset - d->vram_ptr; + stride = attr->stride; + start = attrib_data_addr + min_element * stride; + update_memory_buffer(d, start, num_elements * stride); + } + + uint32_t provoking_element_index = provoking_element - min_element; + size_t element_size = attr->size * attr->count; + assert(element_size <= sizeof(attr->inline_value)); + const uint8_t *last_entry; + + if (inline_data) { + last_entry = + (uint8_t *)pg->inline_array + attr->inline_array_offset; + } else { + last_entry = d->vram_ptr + start; + } + if (!stride) { + // Stride of 0 indicates that only the first element should be + // used. + pg->uniform_attrs |= 1 << i; + pgraph_update_inline_value(attr, last_entry); + NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}", + attr->inline_value[0], attr->inline_value[1], + attr->inline_value[2], attr->inline_value[3]); + NV2A_VK_DGROUP_END(); + continue; + } + + NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr); + last_entry += stride * provoking_element_index; + pgraph_update_inline_value(attr, last_entry); + + r->vertex_attribute_to_description_location[i] = + r->num_active_vertex_binding_descriptions; + + r->vertex_binding_descriptions + [r->num_active_vertex_binding_descriptions++] = + (VkVertexInputBindingDescription){ + .binding = r->vertex_attribute_to_description_location[i], + .stride = stride, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; + + r->vertex_attribute_descriptions + [r->num_active_vertex_attribute_descriptions++] = + (VkVertexInputAttributeDescription){ + .binding = r->vertex_attribute_to_description_location[i], + .location = i, + .format = vk_format, + }; + + r->vertex_attribute_offsets[i] = attrib_data_addr; + + NV2A_VK_DGROUP_END(); + } + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + pg->compressed_attrs = 0; + pg->uniform_attrs = 0; + pg->swizzle_attrs = 0; + + r->num_active_vertex_attribute_descriptions = 0; + r->num_active_vertex_binding_descriptions = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->inline_buffer_populated) { + r->vertex_attribute_to_description_location[i] = + r->num_active_vertex_binding_descriptions; + r->vertex_binding_descriptions + [r->num_active_vertex_binding_descriptions++] = + (VkVertexInputBindingDescription){ + .binding = + r->vertex_attribute_to_description_location[i], + .stride = 4 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; + r->vertex_attribute_descriptions + [r->num_active_vertex_attribute_descriptions++] = + (VkVertexInputAttributeDescription){ + .binding = + r->vertex_attribute_to_description_location[i], + .location = i, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + }; + memcpy(attr->inline_value, + attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, + sizeof(attr->inline_value)); + } else { + r->vertex_attribute_to_description_location[i] = -1; + pg->uniform_attrs |= 1 << i; + } + } +} \ No newline at end of file diff --git a/hw/xbox/nv2a/vsh.h b/hw/xbox/nv2a/pgraph/vsh.h similarity index 92% rename from hw/xbox/nv2a/vsh.h rename to hw/xbox/nv2a/pgraph/vsh.h index 18ef4bb5f2..405b6c9aa6 100644 --- a/hw/xbox/nv2a/vsh.h +++ b/hw/xbox/nv2a/pgraph/vsh.h @@ -21,7 +21,7 @@ #define HW_NV2A_VSH_H #include -#include "shaders_common.h" +#include "qemu/mstring.h" enum VshLight { LIGHT_OFF, @@ -130,11 +130,4 @@ typedef enum { uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name); -void vsh_translate(uint16_t version, - const uint32_t *tokens, - unsigned int length, - bool z_perspective, - MString *header, MString *body); - - #endif diff --git a/hw/xbox/nv2a/shaders.c b/hw/xbox/nv2a/shaders.c deleted file mode 100644 index cafe326e93..0000000000 --- a/hw/xbox/nv2a/shaders.c +++ /dev/null @@ -1,1599 +0,0 @@ -/* - * QEMU Geforce NV2A shader generator - * - * Copyright (c) 2015 espes - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2020-2021 Matt Borgerson - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include - -#include "shaders_common.h" -#include "shaders.h" -#include "nv2a_int.h" -#include "ui/xemu-settings.h" -#include "xemu-version.h" - -void mstring_append_fmt(MString *qstring, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - mstring_append_va(qstring, fmt, ap); - va_end(ap); -} - -MString *mstring_from_fmt(const char *fmt, ...) -{ - MString *ret = mstring_new(); - va_list ap; - va_start(ap, fmt); - mstring_append_va(ret, fmt, ap); - va_end(ap); - - return ret; -} - -void mstring_append_va(MString *qstring, const char *fmt, va_list va) -{ - char scratch[256]; - - va_list ap; - va_copy(ap, va); - const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); - va_end(ap); - - if (len == 0) { - return; - } else if (len < sizeof(scratch)) { - mstring_append(qstring, scratch); - return; - } - - /* overflowed out scratch buffer, alloc and try again */ - char *buf = g_malloc(len + 1); - va_copy(ap, va); - vsnprintf(buf, len + 1, fmt, ap); - va_end(ap); - - mstring_append(qstring, buf); - g_free(buf); -} - -GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) -{ - if (polygon_mode == POLY_MODE_POINT) { - return GL_POINTS; - } - - switch (primitive_mode) { - case PRIM_TYPE_POINTS: return GL_POINTS; - case PRIM_TYPE_LINES: return GL_LINES; - case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; - case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; - case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; - case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; - case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; - case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; - case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; - case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return GL_LINE_LOOP; - } else if (polygon_mode == POLY_MODE_FILL) { - return GL_TRIANGLE_FAN; - } - - assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); - return 0; - default: - assert(!"Invalid primitive_mode"); - return 0; - } -} - -static MString* generate_geometry_shader( - enum ShaderPolygonMode polygon_front_mode, - enum ShaderPolygonMode polygon_back_mode, - enum ShaderPrimitiveMode primitive_mode, - GLenum *gl_primitive_mode, - bool smooth_shading) -{ - /* FIXME: Missing support for 2-sided-poly mode */ - assert(polygon_front_mode == polygon_back_mode); - enum ShaderPolygonMode polygon_mode = polygon_front_mode; - - *gl_primitive_mode = get_gl_primitive_mode(polygon_mode, primitive_mode); - - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return NULL; - } - - /* Handle LINE and FILL mode */ - const char *layout_in = NULL; - const char *layout_out = NULL; - const char *body = NULL; - switch (primitive_mode) { - case PRIM_TYPE_POINTS: return NULL; - case PRIM_TYPE_LINES: return NULL; - case PRIM_TYPE_LINE_LOOP: return NULL; - case PRIM_TYPE_LINE_STRIP: return NULL; - case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(0, 0);\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - /* Imagine a quad made of a tristrip, the comments tell you which - * vertex we are using */ - body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" /* bottom right */ - " }\n" - " emit_vertex(1, 0);\n" /* top right */ - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(0, 0);\n" /* bottom right */ - " } else {\n" - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(1, 0);\n" /* top left */ - " emit_vertex(0, 0);\n" /* top right */ - " }\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" - " }\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_QUADS: - layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(1, 3);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - case PRIM_TYPE_QUAD_STRIP: - layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3);\n" - " }\n" - " emit_vertex(1, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(0, 3);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return NULL; - } - if (polygon_mode == POLY_MODE_FILL) { - if (smooth_shading) { - return NULL; - } - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " emit_vertex(0, 2);\n" - " emit_vertex(1, 2);\n" - " emit_vertex(2, 2);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - - default: - assert(false); - return NULL; - } - - /* generate a geometry shader to support deprecated primitive types */ - assert(layout_in); - assert(layout_out); - assert(body); - MString* s = mstring_from_str("#version 330\n" - "\n"); - mstring_append(s, layout_in); - mstring_append(s, layout_out); - mstring_append(s, "\n"); - if (smooth_shading) { - mstring_append(s, - STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH - "\n" - STRUCT_VERTEX_DATA_OUT_SMOOTH - "\n" - "void emit_vertex(int index, int _unused) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[index];\n" - " vtxD0 = v_vtxD0[index];\n" - " vtxD1 = v_vtxD1[index];\n" - " vtxB0 = v_vtxB0[index];\n" - " vtxB1 = v_vtxB1[index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); - } else { - mstring_append(s, - STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT - "\n" - STRUCT_VERTEX_DATA_OUT_FLAT - "\n" - "void emit_vertex(int index, int provoking_index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" - " vtxD0 = v_vtxD0[provoking_index];\n" - " vtxD1 = v_vtxD1[provoking_index];\n" - " vtxB0 = v_vtxB0[provoking_index];\n" - " vtxB1 = v_vtxB1[provoking_index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); - } - - mstring_append(s, "\n" - "void main() {\n"); - mstring_append(s, body); - mstring_append(s, "}\n"); - - return s; -} - -static void append_skinning_code(MString* str, bool mix, - unsigned int count, const char* type, - const char* output, const char* input, - const char* matrix, const char* swizzle) -{ - if (count == 0) { - mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n", - type, output, input, matrix, swizzle); - } else { - mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type); - if (mix) { - /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */ - mstring_append(str, "{\n" - " float weight_i;\n" - " float weight_n = 1.0;\n"); - int i; - for (i = 0; i < count; i++) { - if (i < (count - 1)) { - char c = "xyzw"[i]; - mstring_append_fmt(str, " weight_i = weight.%c;\n" - " weight_n -= weight_i;\n", - c); - } else { - mstring_append(str, " weight_i = weight_n;\n"); - } - mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n", - output, input, matrix, i, swizzle); - } - mstring_append(str, "}\n"); - } else { - /* Individual weights */ - int i; - for (i = 0; i < count; i++) { - char c = "xyzw"[i]; - mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n", - output, input, matrix, i, swizzle, c); - } - } - } -} - -#define GLSL_C(idx) "c[" stringify(idx) "]" -#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]" - -#define GLSL_C_MAT4(idx) \ - "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \ - GLSL_C(idx+2) ", " GLSL_C(idx+3) ")" - -#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n" - -static void generate_fixed_function(const ShaderState *state, - MString *header, MString *body) -{ - int i, j; - - /* generate vertex shader mimicking fixed function */ - mstring_append(header, -"#define position v0\n" -"#define weight v1\n" -"#define normal v2.xyz\n" -"#define diffuse v3\n" -"#define specular v4\n" -"#define fogCoord v5.x\n" -"#define pointSize v6\n" -"#define backDiffuse v7\n" -"#define backSpecular v8\n" -"#define texture0 v9\n" -"#define texture1 v10\n" -"#define texture2 v11\n" -"#define texture3 v12\n" -"#define reserved1 v13\n" -"#define reserved2 v14\n" -"#define reserved3 v15\n" -"\n" -"uniform vec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n" -"uniform vec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n" -"uniform vec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n" -"\n" -GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0)) -GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0)) -"\n" -GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0)) -GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1)) -GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2)) -GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0)) -GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1)) -GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2)) -GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0)) -GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1)) -GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2)) -GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0)) -GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1)) -GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2)) -GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3)) -"\n" -GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0)) -GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1)) -GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2)) -GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3)) -"\n" -GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0)) -GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1)) -GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2)) -GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3)) -"\n" -GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP)) -"\n" -"#define lightAmbientColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n" -"#define lightDiffuseColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n" -"#define lightSpecularColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n" -"\n" -"#define lightSpotFalloff(i) " - "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n" -"#define lightSpotDirection(i) " - "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n" -"\n" -"#define lightLocalRange(i) " - "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n" -"\n" -GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") -GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") -"\n" -"uniform mat4 invViewport;\n" -"\n"); - - /* Skinning */ - unsigned int count; - bool mix; - switch (state->skinning) { - case SKINNING_OFF: - mix = false; count = 0; break; - case SKINNING_1WEIGHTS: - mix = true; count = 2; break; - case SKINNING_2WEIGHTS2MATRICES: - mix = false; count = 2; break; - case SKINNING_2WEIGHTS: - mix = true; count = 3; break; - case SKINNING_3WEIGHTS3MATRICES: - mix = false; count = 3; break; - case SKINNING_3WEIGHTS: - mix = true; count = 4; break; - case SKINNING_4WEIGHTS4MATRICES: - mix = false; count = 4; break; - default: - assert(false); - break; - } - mstring_append_fmt(body, "/* Skinning mode %d */\n", - state->skinning); - - append_skinning_code(body, mix, count, "vec4", - "tPosition", "position", - "modelViewMat", "xyzw"); - append_skinning_code(body, mix, count, "vec3", - "tNormal", "vec4(normal, 0.0)", - "invModelViewMat", "xyz"); - - /* Normalization */ - if (state->normalization) { - mstring_append(body, "tNormal = normalize(tNormal);\n"); - } - - /* Texgen */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - mstring_append_fmt(body, "/* Texgen for stage %d */\n", - i); - /* Set each component individually */ - /* FIXME: could be nicer if some channels share the same texgen */ - for (j = 0; j < 4; j++) { - /* TODO: TexGen View Model missing! */ - char c = "xyzw"[j]; - char cSuffix = "STRQ"[j]; - switch (state->texgen[i][j]) { - case TEXGEN_DISABLE: - mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n", - i, c, i, c); - break; - case TEXGEN_EYE_LINEAR: - mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n", - i, c, cSuffix, i); - break; - case TEXGEN_OBJECT_LINEAR: - mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n", - i, c, cSuffix, i); - break; - case TEXGEN_SPHERE_MAP: - assert(j < 2); /* Channels S,T only! */ - mstring_append(body, "{\n"); - /* FIXME: u, r and m only have to be calculated once */ - mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); - //FIXME: tNormal before or after normalization? Always normalize? - mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); - - /* FIXME: This would consume 1 division fewer and *might* be - * faster than length: - * // [z=1/(2*x) => z=1/x*0.5] - * vec3 ro = r + vec3(0.0, 0.0, 1.0); - * float m = inversesqrt(dot(ro,ro))*0.5; - */ - - mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n"); - mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n", - i, c, c); - mstring_append(body, "}\n"); - break; - case TEXGEN_REFLECTION_MAP: - assert(j < 3); /* Channels S,T,R only! */ - mstring_append(body, "{\n"); - /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */ - mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); - mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); - mstring_append_fmt(body, " oT%d.%c = r.%c;\n", - i, c, c); - mstring_append(body, "}\n"); - break; - case TEXGEN_NORMAL_MAP: - assert(j < 3); /* Channels S,T,R only! */ - mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n", - i, c, c); - break; - default: - assert(false); - break; - } - } - } - - /* Apply texture matrices */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - if (state->texture_matrix_enable[i]) { - mstring_append_fmt(body, - "oT%d = oT%d * texMat%d;\n", - i, i, i); - } - } - - /* Lighting */ - if (state->lighting) { - - //FIXME: Do 2 passes if we want 2 sided-lighting? - - static char alpha_source_diffuse[] = "diffuse.a"; - static char alpha_source_specular[] = "specular.a"; - static char alpha_source_material[] = "material_alpha"; - const char *alpha_source = alpha_source_diffuse; - if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append(header, "uniform float material_alpha;\n"); - alpha_source = alpha_source_material; - } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) { - alpha_source = alpha_source_specular; - } - - if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source); - } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) { - mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source); - } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) { - mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source); - } - - mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n"); - if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append(body, "oD0.rgb += sceneAmbientColor;\n"); - } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) { - mstring_append(body, "oD0.rgb += diffuse.rgb;\n"); - } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) { - mstring_append(body, "oD0.rgb += specular.rgb;\n"); - } - - mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n"); - - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - if (state->light[i] == LIGHT_OFF) { - continue; - } - - /* FIXME: It seems that we only have to handle the surface colors if - * they are not part of the material [= vertex colors]. - * If they are material the cpu will premultiply light - * colors - */ - - mstring_append_fmt(body, "/* Light %d */ {\n", i); - - if (state->light[i] == LIGHT_LOCAL - || state->light[i] == LIGHT_SPOT) { - - mstring_append_fmt(header, - "uniform vec3 lightLocalPosition%d;\n" - "uniform vec3 lightLocalAttenuation%d;\n", - i, i); - mstring_append_fmt(body, - " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n" - " float d = length(VP);\n" -//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights? - " VP = normalize(VP);\n" - " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n" - " + lightLocalAttenuation%d.y * d\n" - " + lightLocalAttenuation%d.z * d * d);\n" - " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */ - " float nDotVP = max(0.0, dot(tNormal, VP));\n" - " float nDotHV = max(0.0, dot(tNormal, halfVector));\n", - i, i, i, i); - - } - - switch(state->light[i]) { - case LIGHT_INFINITE: - - /* lightLocalRange will be 1e+30 here */ - - mstring_append_fmt(header, - "uniform vec3 lightInfiniteHalfVector%d;\n" - "uniform vec3 lightInfiniteDirection%d;\n", - i, i); - mstring_append_fmt(body, - " float attenuation = 1.0;\n" - " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n" - " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n", - i, i); - - /* FIXME: Do specular */ - - /* FIXME: tBackDiffuse */ - - break; - case LIGHT_LOCAL: - /* Everything done already */ - break; - case LIGHT_SPOT: - /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */ - mstring_append_fmt(body, - " vec4 spotDir = lightSpotDirection(%d);\n" - " float invScale = 1/length(spotDir.xyz);\n" - " float cosHalfPhi = -invScale*spotDir.w;\n" - " float cosHalfTheta = invScale + cosHalfPhi;\n" - " float spotDirDotVP = dot(spotDir.xyz, VP);\n" - " float rho = invScale*spotDirDotVP;\n" - " if (rho > cosHalfTheta) {\n" - " } else if (rho <= cosHalfPhi) {\n" - " attenuation = 0.0;\n" - " } else {\n" - " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */ - " }\n", - i); - break; - default: - assert(false); - break; - } - - mstring_append_fmt(body, - " float pf;\n" - " if (nDotVP == 0.0) {\n" - " pf = 0.0;\n" - " } else {\n" - " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n" - " }\n" - " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n" - " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n" - " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n", - i, i, i); - - mstring_append(body, - " oD0.xyz += lightAmbient;\n"); - - switch (state->diffuse_src) { - case MATERIAL_COLOR_SRC_MATERIAL: - mstring_append(body, - " oD0.xyz += lightDiffuse;\n"); - break; - case MATERIAL_COLOR_SRC_DIFFUSE: - mstring_append(body, - " oD0.xyz += diffuse.xyz * lightDiffuse;\n"); - break; - case MATERIAL_COLOR_SRC_SPECULAR: - mstring_append(body, - " oD0.xyz += specular.xyz * lightDiffuse;\n"); - break; - } - - mstring_append(body, - " oD1.xyz += specular.xyz * lightSpecular;\n"); - - mstring_append(body, "}\n"); - } - } else { - mstring_append(body, " oD0 = diffuse;\n"); - mstring_append(body, " oD1 = specular;\n"); - } - mstring_append(body, " oB0 = backDiffuse;\n"); - mstring_append(body, " oB1 = backSpecular;\n"); - - /* Fog */ - if (state->fog_enable) { - - /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */ - switch(state->foggen) { - case FOGGEN_SPEC_ALPHA: - /* FIXME: Do we have to clamp here? */ - mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n"); - break; - case FOGGEN_RADIAL: - mstring_append(body, " float fogDistance = length(tPosition.xyz);\n"); - break; - case FOGGEN_PLANAR: - case FOGGEN_ABS_PLANAR: - mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n"); - if (state->foggen == FOGGEN_ABS_PLANAR) { - mstring_append(body, " fogDistance = abs(fogDistance);\n"); - } - break; - case FOGGEN_FOG_X: - mstring_append(body, " float fogDistance = fogCoord;\n"); - break; - default: - assert(false); - break; - } - - } - - /* If skinning is off the composite matrix already includes the MV matrix */ - if (state->skinning == SKINNING_OFF) { - mstring_append(body, " tPosition = position;\n"); - } - - mstring_append(body, - " oPos = invViewport * (tPosition * compositeMat);\n" - " oPos.z = oPos.z * 2.0 - oPos.w;\n"); - - /* FIXME: Testing */ - if (state->point_params_enable) { - mstring_append_fmt( - body, - " float d_e = length(position * modelViewMat0);\n" - " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n", - state->point_params[0], state->point_params[1], state->point_params[2], - state->point_params[6]); - mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n", - state->point_params[3], state->point_params[7], - state->surface_scale_factor); - } else { - mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, - state->surface_scale_factor); - } - - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n"); -} - -static MString *generate_vertex_shader(const ShaderState *state, - bool prefix_outputs) -{ - int i; - MString *header = mstring_from_str( -"#version 400\n" -"\n" -"uniform vec4 clipRange;\n" -"uniform vec2 surfaceSize;\n" -"\n" -/* All constants in 1 array declaration */ -"uniform vec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n" -"\n" -"uniform vec4 fogColor;\n" -"uniform float fogParam[2];\n" -"\n" - -GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG)) -GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT)) -GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT)) -GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT)) -GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT)) - -"\n" -"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n" -"\n" -"vec4 decompress_11_11_10(int cmp) {\n" -" float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n" -" float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n" -" float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n" -" return vec4(x, y, z, 1);\n" -"}\n"); - if (prefix_outputs) { - mstring_append(header, state->smooth_shading ? - STRUCT_V_VERTEX_DATA_OUT_SMOOTH : - STRUCT_V_VERTEX_DATA_OUT_FLAT); - mstring_append(header, - "#define vtx_inv_w v_vtx_inv_w\n" - "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" - "#define vtxD0 v_vtxD0\n" - "#define vtxD1 v_vtxD1\n" - "#define vtxB0 v_vtxB0\n" - "#define vtxB1 v_vtxB1\n" - "#define vtxFog v_vtxFog\n" - "#define vtxT0 v_vtxT0\n" - "#define vtxT1 v_vtxT1\n" - "#define vtxT2 v_vtxT2\n" - "#define vtxT3 v_vtxT3\n" - ); - } else { - mstring_append(header, state->smooth_shading ? - STRUCT_VERTEX_DATA_OUT_SMOOTH : - STRUCT_VERTEX_DATA_OUT_FLAT); - } - mstring_append(header, "\n"); - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - if (state->compressed_attrs & (1 << i)) { - mstring_append_fmt(header, - "layout(location = %d) in int v%d_cmp;\n", i, i); - } else { - mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n", - i, i); - } - } - mstring_append(header, "\n"); - - MString *body = mstring_from_str("void main() {\n"); - - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - if (state->compressed_attrs & (1 << i)) { - mstring_append_fmt( - body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i); - } - } - - if (state->fixed_function) { - generate_fixed_function(state, header, body); - } else if (state->vertex_program) { - vsh_translate(VSH_VERSION_XVS, - (uint32_t*)state->program_data, - state->program_length, - state->z_perspective, - header, body); - } else { - assert(false); - } - - - /* Fog */ - - if (state->fog_enable) { - - if (state->vertex_program) { - /* FIXME: Does foggen do something here? Let's do some tracking.. - * - * "RollerCoaster Tycoon" has - * state->vertex_program = true; state->foggen == FOGGEN_PLANAR - * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z - */ - mstring_append(body, " float fogDistance = oFog.x;\n"); - } - - /* FIXME: Do this per pixel? */ - - switch (state->fog_mode) { - case FOG_MODE_LINEAR: - case FOG_MODE_LINEAR_ABS: - - /* f = (end - d) / (end - start) - * fogParam[1] = -1 / (end - start) - * fogParam[0] = 1 - end * fogParam[1]; - */ - - mstring_append(body, - " if (isinf(fogDistance)) {\n" - " fogDistance = 0.0;\n" - " }\n" - ); - mstring_append(body, " float fogFactor = fogParam[0] + fogDistance * fogParam[1];\n"); - mstring_append(body, " fogFactor -= 1.0;\n"); - break; - case FOG_MODE_EXP: - mstring_append(body, - " if (isinf(fogDistance)) {\n" - " fogDistance = 0.0;\n" - " }\n" - ); - /* fallthru */ - case FOG_MODE_EXP_ABS: - - /* f = 1 / (e^(d * density)) - * fogParam[1] = -density / (2 * ln(256)) - * fogParam[0] = 1.5 - */ - - mstring_append(body, " float fogFactor = fogParam[0] + exp2(fogDistance * fogParam[1] * 16.0);\n"); - mstring_append(body, " fogFactor -= 1.5;\n"); - break; - case FOG_MODE_EXP2: - case FOG_MODE_EXP2_ABS: - - /* f = 1 / (e^((d * density)^2)) - * fogParam[1] = -density / (2 * sqrt(ln(256))) - * fogParam[0] = 1.5 - */ - - mstring_append(body, " float fogFactor = fogParam[0] + exp2(-fogDistance * fogDistance * fogParam[1] * fogParam[1] * 32.0);\n"); - mstring_append(body, " fogFactor -= 1.5;\n"); - break; - default: - assert(false); - break; - } - /* Calculate absolute for the modes which need it */ - switch (state->fog_mode) { - case FOG_MODE_LINEAR_ABS: - case FOG_MODE_EXP_ABS: - case FOG_MODE_EXP2_ABS: - mstring_append(body, " fogFactor = abs(fogFactor);\n"); - break; - default: - break; - } - - mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n"); - } else { - /* FIXME: Is the fog still calculated / passed somehow?! - */ - mstring_append(body, " oFog.xyzw = vec4(1.0);\n"); - } - - /* Set outputs */ - const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; - mstring_append_fmt(body, "\n" - " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" - " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" - " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" - " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" - " vtxFog = oFog.x * vtx_inv_w;\n" - " vtxT0 = oT0 * vtx_inv_w;\n" - " vtxT1 = oT1 * vtx_inv_w;\n" - " vtxT2 = oT2 * vtx_inv_w;\n" - " vtxT3 = oT3 * vtx_inv_w;\n" - " gl_Position = oPos;\n" - " gl_PointSize = oPts.x;\n" - " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near - " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far - "\n" - "}\n", - shade_model_mult, - shade_model_mult, - shade_model_mult, - shade_model_mult); - - - /* Return combined header + source */ - mstring_append(header, mstring_get_str(body)); - mstring_unref(body); - return header; - -} - -static GLuint create_gl_shader(GLenum gl_shader_type, - const char *code, - const char *name) -{ - GLint compiled = 0; - - NV2A_GL_DGROUP_BEGIN("Creating new %s", name); - - NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code); - - GLuint shader = glCreateShader(gl_shader_type); - glShaderSource(shader, 1, &code, 0); - glCompileShader(shader); - - /* Check it compiled */ - compiled = 0; - glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - GLchar* log; - GLint log_length; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); - log = g_malloc(log_length * sizeof(GLchar)); - glGetShaderInfoLog(shader, log_length, NULL, log); - fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log); - g_free(log); - - NV2A_GL_DGROUP_END(); - abort(); - } - - NV2A_GL_DGROUP_END(); - - return shader; -} - -void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) -{ - int i, j; - char tmp[64]; - - /* set texture samplers */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - char samplerName[16]; - snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); - GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); - if (texSampLoc >= 0) { - glUniform1i(texSampLoc, i); - } - } - - /* validate the program */ - glValidateProgram(binding->gl_program); - GLint valid = 0; - glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); - fprintf(stderr, "nv2a: shader validation failed: %s\n", log); - abort(); - } - - /* lookup fragment shader uniforms */ - for (i = 0; i < 9; i++) { - for (j = 0; j < 2; j++) { - snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); - binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); - } - } - binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); - for (i = 1; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "bumpMat%d", i); - binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "bumpScale%d", i); - binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); - binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "texScale%d", i); - binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - /* lookup vertex shader uniforms */ - for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { - snprintf(tmp, sizeof(tmp), "c[%d]", i); - binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); - binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); - binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); - binding->fog_param_loc[0] = glGetUniformLocation(binding->gl_program, "fogParam[0]"); - binding->fog_param_loc[1] = glGetUniformLocation(binding->gl_program, "fogParam[1]"); - - binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); - for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); - binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); - binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_LTC1_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); - binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); - binding->light_infinite_half_vector_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); - binding->light_infinite_direction_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - - snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); - binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); - binding->light_local_attenuation_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < 8; i++) { - snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); - binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - if (state->fixed_function) { - binding->material_alpha_loc = - glGetUniformLocation(binding->gl_program, "material_alpha"); - } else { - binding->material_alpha_loc = -1; - } -} - -ShaderBinding *generate_shaders(const ShaderState *state) -{ - char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); - if (previous_numeric_locale) { - previous_numeric_locale = g_strdup(previous_numeric_locale); - } - - /* Ensure numeric values are printed with '.' radix, no grouping */ - setlocale(LC_NUMERIC, "C"); - GLuint program = glCreateProgram(); - - /* Create an optional geometry shader and find primitive type */ - GLenum gl_primitive_mode; - MString* geometry_shader_code = - generate_geometry_shader(state->polygon_front_mode, - state->polygon_back_mode, - state->primitive_mode, - &gl_primitive_mode, - state->smooth_shading); - if (geometry_shader_code) { - const char* geometry_shader_code_str = - mstring_get_str(geometry_shader_code); - GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER, - geometry_shader_code_str, - "geometry shader"); - glAttachShader(program, geometry_shader); - mstring_unref(geometry_shader_code); - } - - /* create the vertex shader */ - MString *vertex_shader_code = - generate_vertex_shader(state, geometry_shader_code != NULL); - GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER, - mstring_get_str(vertex_shader_code), - "vertex shader"); - glAttachShader(program, vertex_shader); - mstring_unref(vertex_shader_code); - - /* generate a fragment shader from register combiners */ - MString *fragment_shader_code = psh_translate(state->psh); - const char *fragment_shader_code_str = - mstring_get_str(fragment_shader_code); - GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER, - fragment_shader_code_str, - "fragment shader"); - glAttachShader(program, fragment_shader); - mstring_unref(fragment_shader_code); - - /* link the program */ - glLinkProgram(program); - GLint linked = 0; - glGetProgramiv(program, GL_LINK_STATUS, &linked); - if(!linked) { - GLchar log[2048]; - glGetProgramInfoLog(program, 2048, NULL, log); - fprintf(stderr, "nv2a: shader linking failed: %s\n", log); - abort(); - } - - glUseProgram(program); - - ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); - ret->gl_program = program; - ret->gl_primitive_mode = gl_primitive_mode; - - update_shader_constant_locations(ret, state); - - if (previous_numeric_locale) { - setlocale(LC_NUMERIC, previous_numeric_locale); - g_free(previous_numeric_locale); - } - - return ret; -} - -static const char *shader_gl_vendor = NULL; - -static void shader_create_cache_folder(void) -{ - char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); - qemu_mkdir(shader_path); - g_free(shader_path); -} - -static char *shader_get_lru_cache_path(void) -{ - return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); -} - -static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) -{ - FILE *lru_list_file = (FILE*) opaque; - size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); - if (written != 1) { - fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", - (unsigned long long) node->hash); - } -} - -void shader_write_cache_reload_list(PGRAPHState *pg) -{ - if (!g_config.perf.cache_shaders) { - qatomic_set(&pg->shader_cache_writeback_pending, false); - qemu_event_set(&pg->shader_cache_writeback_complete); - return; - } - - char *shader_lru_path = shader_get_lru_cache_path(); - qemu_thread_join(&pg->shader_disk_thread); - - FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); - g_free(shader_lru_path); - if (!lru_list) { - fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); - return; - } - - lru_visit_active(&pg->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); - fclose(lru_list); - - lru_flush(&pg->shader_cache); - - qatomic_set(&pg->shader_cache_writeback_pending, false); - qemu_event_set(&pg->shader_cache_writeback_complete); -} - -bool shader_load_from_memory(ShaderLruNode *snode) -{ - assert(glGetError() == GL_NO_ERROR); - - if (!snode->program) { - return false; - } - - GLuint gl_program = glCreateProgram(); - glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); - GLint gl_error = glGetError(); - if (gl_error != GL_NO_ERROR) { - NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); - glDeleteProgram(gl_program); - return false; - } - - glValidateProgram(gl_program); - GLint valid = 0; - glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(gl_program, 1024, NULL, log); - NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); - glDeleteProgram(gl_program); - return false; - } - - glUseProgram(gl_program); - - ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); - binding->gl_program = gl_program; - binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, - snode->state.primitive_mode); - snode->binding = binding; - - g_free(snode->program); - snode->program = NULL; - - update_shader_constant_locations(binding, &snode->state); - - return true; -} - -static char *shader_get_bin_directory(uint64_t hash) -{ - const char *cfg_dir = xemu_settings_get_base_path(); - uint64_t bin_mask = 0xffffUL << 48; - char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", - cfg_dir, (hash & bin_mask) >> 48); - return shader_bin_dir; -} - -static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) -{ - uint64_t bin_mask = 0xffffUL << 48; - return g_strdup_printf("%s/%012lx", shader_bin_dir, - hash & (~bin_mask)); -} - -static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) -{ - char *shader_bin_dir = shader_get_bin_directory(hash); - char *shader_path = shader_get_binary_path(shader_bin_dir, hash); - char *cached_xemu_version = NULL; - char *cached_gl_vendor = NULL; - void *program_buffer = NULL; - - uint64_t cached_xemu_version_len; - uint64_t gl_vendor_len; - GLenum program_binary_format; - ShaderState state; - size_t shader_size; - - g_free(shader_bin_dir); - - qemu_mutex_lock(&pg->shader_cache_lock); - if (lru_contains_hash(&pg->shader_cache, hash)) { - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - } - qemu_mutex_unlock(&pg->shader_cache_lock); - - FILE *shader_file = qemu_fopen(shader_path, "rb"); - if (!shader_file) { - goto error; - } - - size_t nread; - #define READ_OR_ERR(data, data_len) \ - do { \ - nread = fread(data, data_len, 1, shader_file); \ - if (nread != 1) { \ - fclose(shader_file); \ - goto error; \ - } \ - } while (0) - - READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); - - cached_xemu_version = g_malloc(cached_xemu_version_len +1); - READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); - if (strcmp(cached_xemu_version, xemu_version) != 0) { - fclose(shader_file); - goto error; - } - - READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); - - cached_gl_vendor = g_malloc(gl_vendor_len); - READ_OR_ERR(cached_gl_vendor, gl_vendor_len); - if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { - fclose(shader_file); - goto error; - } - - READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); - READ_OR_ERR(&state, sizeof(state)); - READ_OR_ERR(&shader_size, sizeof(shader_size)); - - program_buffer = g_malloc(shader_size); - READ_OR_ERR(program_buffer, shader_size); - - #undef READ_OR_ERR - - fclose(shader_file); - g_free(shader_path); - g_free(cached_xemu_version); - g_free(cached_gl_vendor); - - qemu_mutex_lock(&pg->shader_cache_lock); - LruNode *node = lru_lookup(&pg->shader_cache, hash, &state); - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - - /* If we happened to regenerate this shader already, then we may as well use the new one */ - if (snode->binding) { - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - } - - snode->program_format = program_binary_format; - snode->program_size = shader_size; - snode->program = program_buffer; - snode->cached = true; - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - -error: - /* Delete the shader so it won't be loaded again */ - qemu_unlink(shader_path); - g_free(shader_path); - g_free(program_buffer); - g_free(cached_xemu_version); - g_free(cached_gl_vendor); -} - -static void *shader_reload_lru_from_disk(void *arg) -{ - if (!g_config.perf.cache_shaders) { - return NULL; - } - - PGRAPHState *pg = (PGRAPHState*) arg; - char *shader_lru_path = shader_get_lru_cache_path(); - - FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); - g_free(shader_lru_path); - if (!lru_shaders_list) { - return NULL; - } - - uint64_t hash; - while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { - shader_load_from_disk(pg, hash); - } - - return NULL; -} - -static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - memcpy(&snode->state, state, sizeof(ShaderState)); - snode->cached = false; - snode->binding = NULL; - snode->program = NULL; - snode->save_thread = NULL; -} - -static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - - if (snode->save_thread) { - qemu_thread_join(snode->save_thread); - g_free(snode->save_thread); - } - - if (snode->binding) { - glDeleteProgram(snode->binding->gl_program); - g_free(snode->binding); - } - - if (snode->program) { - g_free(snode->program); - } - - snode->cached = false; - snode->save_thread = NULL; - snode->binding = NULL; - snode->program = NULL; - memset(&snode->state, 0, sizeof(ShaderState)); -} - -static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - return memcmp(&snode->state, key, sizeof(ShaderState)); -} - -void shader_cache_init(PGRAPHState *pg) -{ - if (!shader_gl_vendor) { - shader_gl_vendor = (const char *) glGetString(GL_VENDOR); - } - - shader_create_cache_folder(); - - /* FIXME: Make this configurable */ - const size_t shader_cache_size = 50*1024; - lru_init(&pg->shader_cache); - pg->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); - assert(pg->shader_cache_entries != NULL); - for (int i = 0; i < shader_cache_size; i++) { - lru_add_free(&pg->shader_cache, &pg->shader_cache_entries[i].node); - } - - pg->shader_cache.init_node = shader_cache_entry_init; - pg->shader_cache.compare_nodes = shader_cache_entry_compare; - pg->shader_cache.post_node_evict = shader_cache_entry_post_evict; - - qemu_thread_create(&pg->shader_disk_thread, "pgraph.shader_cache", - shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); -} - -static void *shader_write_to_disk(void *arg) -{ - ShaderLruNode *snode = (ShaderLruNode*) arg; - - char *shader_bin = shader_get_bin_directory(snode->node.hash); - char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); - - static uint64_t gl_vendor_len; - if (gl_vendor_len == 0) { - gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); - } - - static uint64_t xemu_version_len = 0; - if (xemu_version_len == 0) { - xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); - } - - qemu_mkdir(shader_bin); - g_free(shader_bin); - - FILE *shader_file = qemu_fopen(shader_path, "wb"); - if (!shader_file) { - goto error; - } - - size_t written; - #define WRITE_OR_ERR(data, data_size) \ - do { \ - written = fwrite(data, data_size, 1, shader_file); \ - if (written != 1) { \ - fclose(shader_file); \ - goto error; \ - } \ - } while (0) - - WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); - WRITE_OR_ERR(xemu_version, xemu_version_len); - - WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); - WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); - - WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); - WRITE_OR_ERR(&snode->state, sizeof(snode->state)); - - WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); - WRITE_OR_ERR(snode->program, snode->program_size); - - #undef WRITE_OR_ERR - - fclose(shader_file); - - g_free(shader_path); - g_free(snode->program); - snode->program = NULL; - - return NULL; - -error: - fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); - qemu_unlink(shader_path); - g_free(shader_path); - g_free(snode->program); - snode->program = NULL; - return NULL; -} - -void shader_cache_to_disk(ShaderLruNode *snode) -{ - if (!snode->binding || snode->cached) { - return; - } - - GLint program_size; - glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); - - if (snode->program) { - g_free(snode->program); - snode->program = NULL; - } - - /* program_size might be zero on some systems, if no binary formats are supported */ - if (program_size == 0) { - return; - } - - snode->program = g_malloc(program_size); - GLsizei program_size_copied; - glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, - &snode->program_format, snode->program); - assert(glGetError() == GL_NO_ERROR); - - snode->program_size = program_size_copied; - snode->cached = true; - - char name[24]; - snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); - snode->save_thread = g_malloc0(sizeof(QemuThread)); - qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); -} diff --git a/hw/xbox/nv2a/shaders_common.h b/hw/xbox/nv2a/shaders_common.h deleted file mode 100644 index ae2ba9f14d..0000000000 --- a/hw/xbox/nv2a/shaders_common.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * QEMU Geforce NV2A shader common definitions - * - * Copyright (c) 2015 espes - * Copyright (c) 2015 Jannik Vogel - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#ifndef HW_NV2A_SHADERS_COMMON_H -#define HW_NV2A_SHADERS_COMMON_H - -#include "debug.h" - -#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \ - "noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \ - "flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \ - "noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n" - -#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "") -#define STRUCT_VERTEX_DATA_IN_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "", "") -#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "") -#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]") - -#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "") -#define STRUCT_VERTEX_DATA_IN_FLAT DEF_VERTEX_DATA("flat", "in", "", "") -#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "") -#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]") - -typedef struct { - int ref; - gchar *string; -} MString; - -void mstring_append_fmt(MString *mstring, const char *fmt, ...); -MString *mstring_from_fmt(const char *fmt, ...); -void mstring_append_va(MString *mstring, const char *fmt, va_list va); - -static inline -void mstring_ref(MString *mstr) -{ - mstr->ref++; -} - -static inline -void mstring_unref(MString *mstr) -{ - mstr->ref--; - if (!mstr->ref) { - g_free(mstr->string); - g_free(mstr); - } -} - -static inline -void mstring_append(MString *mstr, const char *str) -{ - gchar *n = g_strconcat(mstr->string, str, NULL); - g_free(mstr->string); - mstr->string = n; -} - -static inline -void mstring_append_chr(MString *mstr, char chr) -{ - mstring_append_fmt(mstr, "%c", chr); -} - -static inline -void mstring_append_int(MString *mstr, int val) -{ - mstring_append_fmt(mstr, "%" PRId64, val); -} - -static inline -MString *mstring_new(void) -{ - MString *mstr = g_malloc(sizeof(MString)); - mstr->ref = 1; - mstr->string = g_strdup(""); - return mstr; -} - -static inline -MString *mstring_from_str(const char *str) -{ - MString *mstr = g_malloc(sizeof(MString)); - mstr->ref = 1; - mstr->string = g_strdup(str); - return mstr; -} - -static inline -const gchar *mstring_get_str(MString *mstr) -{ - return mstr->string; -} - -static inline -size_t mstring_get_length(MString *mstr) -{ - return strlen(mstr->string); -} - - -#endif diff --git a/hw/xbox/nv2a/lru.h b/include/qemu/lru.h similarity index 87% rename from hw/xbox/nv2a/lru.h rename to include/qemu/lru.h index c0dca7ec5d..b588270282 100644 --- a/hw/xbox/nv2a/lru.h +++ b/include/qemu/lru.h @@ -1,7 +1,7 @@ /* * LRU object list * - * Copyright (c) 2021 Matt Borgerson + * Copyright (c) 2021-2024 Matt Borgerson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -42,6 +42,8 @@ typedef struct Lru Lru; struct Lru { QTAILQ_HEAD(, LruNode) global; QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS]; + int num_used; + int num_free; /* Initialize a node. */ void (*init_node)(Lru *lru, LruNode *node, void *key); @@ -67,6 +69,8 @@ void lru_init(Lru *lru) lru->compare_nodes = NULL; lru->pre_node_evict = NULL; lru->post_node_evict = NULL; + lru->num_free = 0; + lru->num_used = 0; } static inline @@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node) { node->next_bin.tqe_circ.tql_prev = NULL; QTAILQ_INSERT_TAIL(&lru->global, node, next_global); + lru->num_free += 1; } static inline @@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node) if (lru->post_node_evict) { lru->post_node_evict(lru, node); } + + lru->num_used -= 1; + lru->num_free += 1; +} + +static inline +LruNode *lru_try_evict_one(Lru *lru) +{ + LruNode *found; + + QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { + if (lru_is_node_in_use(lru, found) + && (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) { + lru_evict_node(lru, found); + return found; + } + } + + return NULL; } static inline LruNode *lru_evict_one(Lru *lru) { - LruNode *found; - - QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { - bool can_evict = true; - if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) { - can_evict = lru->pre_node_evict(lru, found); - } - if (can_evict) { - break; - } - } + LruNode *found = lru_try_evict_one(lru); assert(found != NULL); /* No evictable node! */ - lru_evict_node(lru, found); return found; } +static inline +LruNode *lru_get_one_free(Lru *lru) +{ + LruNode *found; + + QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { + if (!lru_is_node_in_use(lru, found)) { + return found; + } + } + + return lru_evict_one(lru); +} + static inline bool lru_contains_hash(Lru *lru, uint64_t hash) { @@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key) if (found) { QTAILQ_REMOVE(&lru->bins[bin], found, next_bin); } else { - found = lru_evict_one(lru); + found = lru_get_one_free(lru); found->hash = hash; if (lru->init_node) { lru->init_node(lru, found, key); } assert(found->hash == hash); + + lru->num_used += 1; + lru->num_free -= 1; } QTAILQ_REMOVE(&lru->global, found, next_global); diff --git a/include/qemu/mstring.h b/include/qemu/mstring.h new file mode 100644 index 0000000000..567fd4cdf3 --- /dev/null +++ b/include/qemu/mstring.h @@ -0,0 +1,82 @@ +#ifndef MSTRING_H +#define MSTRING_H + +#include "qemu/osdep.h" +#include + +typedef struct { + int ref; + gchar *string; +} MString; + +void mstring_append_fmt(MString *mstring, const char *fmt, ...); +MString *mstring_from_fmt(const char *fmt, ...); +void mstring_append_va(MString *mstring, const char *fmt, va_list va); + +static inline +void mstring_ref(MString *mstr) +{ + mstr->ref++; +} + +static inline +void mstring_unref(MString *mstr) +{ + mstr->ref--; + if (!mstr->ref) { + g_free(mstr->string); + g_free(mstr); + } +} + +static inline +void mstring_append(MString *mstr, const char *str) +{ + gchar *n = g_strconcat(mstr->string, str, NULL); + g_free(mstr->string); + mstr->string = n; +} + +static inline +void mstring_append_chr(MString *mstr, char chr) +{ + mstring_append_fmt(mstr, "%c", chr); +} + +static inline +void mstring_append_int(MString *mstr, int val) +{ + mstring_append_fmt(mstr, "%" PRId64, val); +} + +static inline +MString *mstring_new(void) +{ + MString *mstr = g_malloc(sizeof(MString)); + mstr->ref = 1; + mstr->string = g_strdup(""); + return mstr; +} + +static inline +MString *mstring_from_str(const char *str) +{ + MString *mstr = g_malloc(sizeof(MString)); + mstr->ref = 1; + mstr->string = g_strdup(str); + return mstr; +} + +static inline +const gchar *mstring_get_str(MString *mstr) +{ + return mstr->string; +} + +static inline +size_t mstring_get_length(MString *mstr) +{ + return strlen(mstr->string); +} + +#endif diff --git a/licenses/SPIRV-Reflect.license.txt b/licenses/SPIRV-Reflect.license.txt new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/licenses/SPIRV-Reflect.license.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/licenses/VulkanMemoryAllocator.license.txt b/licenses/VulkanMemoryAllocator.license.txt new file mode 100644 index 0000000000..b9fff388f1 --- /dev/null +++ b/licenses/VulkanMemoryAllocator.license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/licenses/volk.license.txt b/licenses/volk.license.txt new file mode 100644 index 0000000000..5a717f2678 --- /dev/null +++ b/licenses/volk.license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2018-2024 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/meson.build b/meson.build index 8980f55a13..7c12d40fb5 100644 --- a/meson.build +++ b/meson.build @@ -1180,6 +1180,34 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu link_args: config_host['EPOXY_LIBS'].split() + opengl_libs) endif +vulkan = not_found +if targetos == 'windows' + vulkan = declare_dependency( + compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'], + ) + libglslang = declare_dependency(link_args: [ + '-lglslang', + '-lMachineIndependent', + '-lGenericCodeGen', + '-lSPIRV', + '-lSPIRV-Tools', + '-lSPIRV-Tools-opt' + ]) +elif targetos == 'linux' + vulkan = dependency('vulkan') + libglslang = declare_dependency(link_args: [ + '-lglslang', + '-lMachineIndependent', + '-lGenericCodeGen', + '-lSPIRV', + '-lSPIRV-Tools', + '-lSPIRV-Tools-opt' + ]) +endif + +subdir('thirdparty') + + gbm = not_found if (have_system or have_tools) and (virgl.found() or opengl.found()) gbm = dependency('gbm', method: 'pkg-config', required: false, @@ -1931,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found()) config_host_data.set('CONFIG_LIBPMEM', libpmem.found()) config_host_data.set('CONFIG_NUMA', numa.found()) config_host_data.set('CONFIG_OPENGL', opengl.found()) +config_host_data.set('CONFIG_VULKAN', vulkan.found()) config_host_data.set('CONFIG_PROFILER', get_option('profiler')) config_host_data.set('CONFIG_RBD', rbd.found()) config_host_data.set('CONFIG_RDMA', rdma.found()) @@ -4054,6 +4083,7 @@ summary_info += {'U2F support': u2f} summary_info += {'libusb': libusb} summary_info += {'usb net redir': usbredir} summary_info += {'OpenGL support (epoxy)': opengl} +summary_info += {'Vulkan support': vulkan} summary_info += {'GBM': gbm} summary_info += {'libiscsi support': libiscsi} summary_info += {'libnfs support': libnfs} diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index 0496ebeb6c..5502c3bfd7 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar" # different to the host OS. submodules="dtc meson ui/keycodemapdb" submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3" -submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras + +# xemu extras +submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu" +submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" + sub_deinit="" function cleanup() { diff --git a/scripts/gen-license.py b/scripts/gen-license.py index b71d4ecd56..216f441f3c 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -228,7 +228,25 @@ Lib('fpng', 'https://github.com/richgel999/fpng', Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu', unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE', ships_static=all_platforms, - submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu') + submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu') + ), + +Lib('volk', 'https://github.com/zeux/volk', + mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md', + ships_static=all_platforms, + submodule=Submodule('thirdparty/volk') + ), + +Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator', + mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt', + ships_static=all_platforms, + submodule=Submodule('thirdparty/VulkanMemoryAllocator') + ), + +Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect', + apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE', + ships_static=all_platforms, + submodule=Submodule('thirdparty/SPIRV-Reflect') ), # @@ -344,6 +362,17 @@ Lib('miniz', 'https://github.com/richgel999/miniz', ships_static={windows}, platform={windows}, version='2.1.0' ), + +Lib('glslang', 'https://github.com/KhronosGroup/glslang', + bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt', + ships_static={windows}, platform={windows}, + ), + +Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools', + apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE', + ships_static={windows}, platform={windows}, + ), + ] def gen_license(): diff --git a/thirdparty/SPIRV-Reflect b/thirdparty/SPIRV-Reflect new file mode 160000 index 0000000000..1d674a82d7 --- /dev/null +++ b/thirdparty/SPIRV-Reflect @@ -0,0 +1 @@ +Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71 diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator new file mode 160000 index 0000000000..009ecd192c --- /dev/null +++ b/thirdparty/VulkanMemoryAllocator @@ -0,0 +1 @@ +Subproject commit 009ecd192c1289c7529bff248a16cfe896254816 diff --git a/thirdparty/meson.build b/thirdparty/meson.build new file mode 100644 index 0000000000..99ecbd2796 --- /dev/null +++ b/thirdparty/meson.build @@ -0,0 +1,12 @@ +if vulkan.found() + +libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: vulkan) +vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) + +libvolk = static_library('volk', sources: 'volk/volk.c', dependencies: vulkan) +volk = declare_dependency(include_directories: 'volk', link_with: libvolk, dependencies: vulkan) + +libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) +spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan) + +endif diff --git a/thirdparty/renderdoc_app.h b/thirdparty/renderdoc_app.h index 7ee24b69ee..c01e05932e 100644 --- a/thirdparty/renderdoc_app.h +++ b/thirdparty/renderdoc_app.h @@ -1,7 +1,7 @@ /****************************************************************************** * The MIT License (MIT) * - * Copyright (c) 2019-2022 Baldur Karlsson + * Copyright (c) 2019-2024 Baldur Karlsson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -35,7 +35,7 @@ #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) #define RENDERDOC_CC __cdecl -#elif defined(__linux__) +#elif defined(__linux__) || defined(__FreeBSD__) #define RENDERDOC_CC #elif defined(__APPLE__) #define RENDERDOC_CC @@ -72,7 +72,8 @@ extern "C" { // RenderDoc capture options // -typedef enum RENDERDOC_CaptureOption { +typedef enum RENDERDOC_CaptureOption +{ // Allow the application to enable vsync // // Default - enabled @@ -214,6 +215,19 @@ typedef enum RENDERDOC_CaptureOption { // necessary as directed by a RenderDoc developer. eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, } RENDERDOC_CaptureOption; // Sets an option that controls how RenderDoc behaves on capture. @@ -233,7 +247,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_Capture // If the option is invalid, -FLT_MAX is returned typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); -typedef enum RENDERDOC_InputButton { +typedef enum RENDERDOC_InputButton +{ // '0' - '9' matches ASCII values eRENDERDOC_Key_0 = 0x30, eRENDERDOC_Key_1 = 0x31, @@ -321,7 +336,8 @@ typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton // If keys is NULL or num is 0, captures keys will be disabled typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); -typedef enum RENDERDOC_OverlayBits { +typedef enum RENDERDOC_OverlayBits +{ // This single bit controls whether the overlay is enabled or disabled globally eRENDERDOC_Overlay_Enabled = 0x1, @@ -452,6 +468,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTarget // ignored and the others will be filled out. typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + ////////////////////////////////////////////////////////////////////////// // Capturing functions // @@ -525,14 +550,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePoint typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); -// Requests that the replay UI show itself (if hidden or not the current top window). This can be -// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle -// showing the UI after making a capture. +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. // -// This will return 1 if the request was successfully passed on, though it's not guaranteed that -// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current -// target control connection to make such a request, or if there was another error -typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API versions @@ -547,7 +573,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); // Note that this means the API returned can be higher than the one you might have requested. // e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned // instead of 1.0.0. You can check this with the GetAPIVersion entry point -typedef enum RENDERDOC_Version { +typedef enum RENDERDOC_Version +{ eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 @@ -560,6 +587,7 @@ typedef enum RENDERDOC_Version { eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 } RENDERDOC_Version; // API version changelog: @@ -588,8 +616,10 @@ typedef enum RENDERDOC_Version { // 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening // 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. // 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() -typedef struct RENDERDOC_API_1_5_0 +typedef struct RENDERDOC_API_1_6_0 { pRENDERDOC_GetAPIVersion GetAPIVersion; @@ -664,19 +694,23 @@ typedef struct RENDERDOC_API_1_5_0 // new function in 1.5.0 pRENDERDOC_ShowReplayUI ShowReplayUI; -} RENDERDOC_API_1_5_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_2; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_2; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_2_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_3_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_2; + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API entry point diff --git a/thirdparty/vma.cc b/thirdparty/vma.cc new file mode 100644 index 0000000000..a2023d33b2 --- /dev/null +++ b/thirdparty/vma.cc @@ -0,0 +1,2 @@ +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" diff --git a/thirdparty/volk b/thirdparty/volk new file mode 160000 index 0000000000..466085407d --- /dev/null +++ b/thirdparty/volk @@ -0,0 +1 @@ +Subproject commit 466085407d5d2f50583fd663c1d65f93a7709d3e diff --git a/ui/meson.build b/ui/meson.build index 18bb7c97c1..75b82df927 100644 --- a/ui/meson.build +++ b/ui/meson.build @@ -40,10 +40,6 @@ xemu_cocoa = dependency('appleframeworks', modules: 'Cocoa') xemu_ss.add(xemu_cocoa) endif -if 'CONFIG_LINUX' in config_host -xemu_ss.add(gtk) -endif - xemu_ss.add(when: 'CONFIG_LINUX', if_true: [gtk, files('xemu-os-utils-linux.c')]) xemu_ss.add(when: 'CONFIG_WIN32', if_true: files('xemu-os-utils-windows.c')) xemu_ss.add(when: 'CONFIG_DARWIN', if_true: files('xemu-os-utils-macos.m')) diff --git a/ui/xemu.c b/ui/xemu.c index d0cec857f4..0d01f22460 100644 --- a/ui/xemu.c +++ b/ui/xemu.c @@ -426,6 +426,7 @@ static void handle_keydown(SDL_Event *ev) { int win; struct sdl2_console *scon = get_scon_from_window(ev->key.windowID); + if (scon == NULL) return; int gui_key_modifier_pressed = get_mod_state(); int gui_keysym = 0; @@ -484,6 +485,7 @@ static void handle_keydown(SDL_Event *ev) static void handle_keyup(SDL_Event *ev) { struct sdl2_console *scon = get_scon_from_window(ev->key.windowID); + if (!scon) return; scon->ignore_hotkeys = false; sdl2_process_key(scon, &ev->key); @@ -944,7 +946,7 @@ static void sdl2_display_very_early_init(DisplayOptions *o) fprintf(stderr, "GL_SHADING_LANGUAGE_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION)); // Initialize offscreen rendering context now - nv2a_gl_context_init(); + nv2a_context_init(); SDL_GL_MakeCurrent(NULL, NULL); // FIXME: atexit(sdl_cleanup); diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc index 75b88cafb6..a9a6c6ec85 100644 --- a/ui/xui/main-menu.cc +++ b/ui/xui/main-menu.cc @@ -449,7 +449,15 @@ void MainMenuInputView::Draw() void MainMenuDisplayView::Draw() { - SectionTitle("Quality"); + SectionTitle("Renderer"); + ChevronCombo("Backend", &g_config.display.renderer, + "Null\0" + "OpenGL\0" +#ifdef CONFIG_VULKAN + "Vulkan\0" +#endif + , + "Select desired renderer implementation"); int rendering_scale = nv2a_get_surface_scale_factor() - 1; if (ChevronCombo("Internal resolution scale", &rendering_scale, "1x\0" diff --git a/ui/xui/main.cc b/ui/xui/main.cc index fd38aa4e7b..069a6282f9 100644 --- a/ui/xui/main.cc +++ b/ui/xui/main.cc @@ -216,7 +216,7 @@ void xemu_hud_render(void) ImGui::NewFrame(); ProcessKeyboardShortcuts(); -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) +#if defined(CONFIG_RENDERDOC) if (g_capture_renderdoc_frame) { nv2a_dbg_renderdoc_capture_frames(1); g_capture_renderdoc_frame = false; diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc index 2d1f48c604..bce0e7a0fb 100644 --- a/ui/xui/menubar.cc +++ b/ui/xui/menubar.cc @@ -71,8 +71,8 @@ void ProcessKeyboardShortcuts(void) ActionScreenshot(); } -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) - if (ImGui::IsKeyPressed(ImGuiKey_F10)) { +#ifdef CONFIG_RENDERDOC + if (ImGui::IsKeyPressed(ImGuiKey_F10) && nv2a_dbg_renderdoc_available()) { nv2a_dbg_renderdoc_capture_frames(1); } #endif @@ -203,7 +203,7 @@ void ShowMainMenu() ImGui::MenuItem("Monitor", "~", &monitor_window.is_open); ImGui::MenuItem("Audio", NULL, &apu_window.m_is_open); ImGui::MenuItem("Video", NULL, &video_window.m_is_open); -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) +#ifdef CONFIG_RENDERDOC if (nv2a_dbg_renderdoc_available()) { ImGui::MenuItem("RenderDoc: Capture", NULL, &g_capture_renderdoc_frame); } diff --git a/util/meson.build b/util/meson.build index 4269ef4e38..72ef1db2b5 100644 --- a/util/meson.build +++ b/util/meson.build @@ -59,6 +59,7 @@ util_ss.add(files('int128.c')) util_ss.add(files('memalign.c')) util_ss.add(when: 'CONFIG_WIN32', if_true: files('miniz/miniz.c')) util_ss.add(files('fast-hash.c')) +util_ss.add(files('mstring.c')) if have_user util_ss.add(files('selfmap.c')) diff --git a/util/mstring.c b/util/mstring.c new file mode 100644 index 0000000000..6cd0af7335 --- /dev/null +++ b/util/mstring.c @@ -0,0 +1,49 @@ +#include "qemu/osdep.h" +#include "qemu/mstring.h" + +#include + +void mstring_append_fmt(MString *qstring, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + mstring_append_va(qstring, fmt, ap); + va_end(ap); +} + +MString *mstring_from_fmt(const char *fmt, ...) +{ + MString *ret = mstring_new(); + va_list ap; + va_start(ap, fmt); + mstring_append_va(ret, fmt, ap); + va_end(ap); + + return ret; +} + +void mstring_append_va(MString *qstring, const char *fmt, va_list va) +{ + char scratch[256]; + + va_list ap; + va_copy(ap, va); + const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); + va_end(ap); + + if (len == 0) { + return; + } else if (len < sizeof(scratch)) { + mstring_append(qstring, scratch); + return; + } + + /* overflowed out scratch buffer, alloc and try again */ + char *buf = g_malloc(len + 1); + va_copy(ap, va); + vsnprintf(buf, len + 1, fmt, ap); + va_end(ap); + + mstring_append(qstring, buf); + g_free(buf); +} diff --git a/xemu-version.c b/xemu-version.c index 523d955760..f2e7a958e9 100644 --- a/xemu-version.c +++ b/xemu-version.c @@ -1,5 +1,8 @@ #include "xemu-version-macro.h" +const int xemu_version_major = XEMU_VERSION_MAJOR; +const int xemu_version_minor = XEMU_VERSION_MINOR; +const int xemu_version_patch = XEMU_VERSION_PATCH; const char *xemu_version = XEMU_VERSION; const char *xemu_branch = XEMU_BRANCH;; const char *xemu_commit = XEMU_COMMIT; diff --git a/xemu-version.h b/xemu-version.h index 484af8a9de..a1fe27fccb 100644 --- a/xemu-version.h +++ b/xemu-version.h @@ -1,6 +1,9 @@ #ifndef XEMU_VERSION_H #define XEMU_VERSION_H +extern const int xemu_version_major; +extern const int xemu_version_minor; +extern const int xemu_version_patch; extern const char *xemu_version; extern const char *xemu_branch; extern const char *xemu_commit;