mirror of
https://github.com/kmc-jp/n64-emu.git
synced 2025-04-02 10:21:43 -04:00
284 lines
9.6 KiB
C++
284 lines
9.6 KiB
C++
/* Copyright (c) 2020 Themaister
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <thread>
|
|
#include <queue>
|
|
#include "device.hpp"
|
|
#include "video_interface.hpp"
|
|
#include "rdp_renderer.hpp"
|
|
#include "rdp_common.hpp"
|
|
#include "command_ring.hpp"
|
|
#include "worker_thread.hpp"
|
|
#include "rdp_dump_write.hpp"
|
|
|
|
namespace RDP
|
|
{
|
|
struct RGBA
|
|
{
|
|
uint8_t r, g, b, a;
|
|
};
|
|
|
|
enum CommandProcessorFlagBits
|
|
{
|
|
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT = 1 << 0,
|
|
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_TMEM_BIT = 1 << 1,
|
|
COMMAND_PROCESSOR_FLAG_UPSCALING_2X_BIT = 1 << 2,
|
|
COMMAND_PROCESSOR_FLAG_UPSCALING_4X_BIT = 1 << 3,
|
|
COMMAND_PROCESSOR_FLAG_UPSCALING_8X_BIT = 1 << 4,
|
|
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_READ_BACK_BIT = 1 << 5,
|
|
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_DITHER_BIT = 1 << 6
|
|
};
|
|
using CommandProcessorFlags = uint32_t;
|
|
|
|
struct CoherencyCopy
|
|
{
|
|
size_t src_offset = 0;
|
|
size_t mask_offset = 0;
|
|
size_t dst_offset = 0;
|
|
size_t size = 0;
|
|
std::atomic_uint32_t *counter_base = nullptr;
|
|
unsigned counters = 0;
|
|
};
|
|
|
|
struct CoherencyOperation
|
|
{
|
|
Vulkan::Fence fence;
|
|
uint64_t timeline_value = 0;
|
|
|
|
uint8_t *dst = nullptr;
|
|
const Vulkan::Buffer *src = nullptr;
|
|
std::vector<CoherencyCopy> copies;
|
|
std::atomic_uint32_t *unlock_cookie = nullptr;
|
|
};
|
|
|
|
// These options control various behavior when upscaling to workaround glitches which arise naturally as part of upscaling.
|
|
struct Quirks
|
|
{
|
|
inline Quirks()
|
|
{
|
|
u.options.native_resolution_tex_rect = true;
|
|
u.options.native_texture_lod = false;
|
|
}
|
|
|
|
inline void set_native_resolution_tex_rect(bool enable)
|
|
{
|
|
u.options.native_resolution_tex_rect = enable;
|
|
}
|
|
|
|
inline void set_native_texture_lod(bool enable)
|
|
{
|
|
u.options.native_texture_lod = enable;
|
|
}
|
|
|
|
union
|
|
{
|
|
struct Opts
|
|
{
|
|
// If true, force TEX_RECT and TEX_RECT_FLIP to render without upscaling.
|
|
// Works around bilinear filtering bugs in Cycle1/Cycle2 mode where game assumed 1:1 pixel transfer.
|
|
bool native_resolution_tex_rect;
|
|
|
|
// Forces LOD to be computed as 1x upscale.
|
|
// Fixes content which relies on LOD computation to select textures in clever ways.
|
|
bool native_texture_lod;
|
|
} options;
|
|
uint32_t words[1];
|
|
} u;
|
|
};
|
|
|
|
class CommandProcessor
|
|
{
|
|
public:
|
|
CommandProcessor(Vulkan::Device &device,
|
|
void *rdram_ptr,
|
|
size_t rdram_offset,
|
|
size_t rdram_size,
|
|
size_t hidden_rdram_size,
|
|
CommandProcessorFlags flags);
|
|
|
|
~CommandProcessor();
|
|
|
|
void set_validation_interface(ValidationInterface *iface);
|
|
|
|
bool device_is_supported() const;
|
|
|
|
// Synchronization.
|
|
void flush();
|
|
uint64_t signal_timeline();
|
|
void wait_for_timeline(uint64_t index);
|
|
void idle();
|
|
void begin_frame_context();
|
|
|
|
// Queues up state and drawing commands.
|
|
void enqueue_command(unsigned num_words, const uint32_t *words);
|
|
void enqueue_command_direct(unsigned num_words, const uint32_t *words);
|
|
|
|
void set_quirks(const Quirks &quirks);
|
|
|
|
// Interact with memory.
|
|
void *begin_read_rdram();
|
|
void end_write_rdram();
|
|
void *begin_read_hidden_rdram();
|
|
void end_write_hidden_rdram();
|
|
size_t get_rdram_size() const;
|
|
size_t get_hidden_rdram_size() const;
|
|
void *get_tmem();
|
|
|
|
// Sets VI register
|
|
void set_vi_register(VIRegister reg, uint32_t value);
|
|
|
|
Vulkan::ImageHandle scanout(const ScanoutOptions &opts = {});
|
|
void scanout_sync(std::vector<RGBA> &colors, unsigned &width, unsigned &height, const ScanoutOptions &opts = {});
|
|
void scanout_async_buffer(VIScanoutBuffer &buffer, const ScanoutOptions &opts = {});
|
|
|
|
// Support for modifying certain registers per-scanline.
|
|
// The idea is that before we scanout(), we use set_vi_register() to
|
|
// set frame-global VI register state.
|
|
// While scanning out, we can support changing some state, in particular HStart and XStart
|
|
// which allows various raster effects ala HDMA.
|
|
// For sanity's sake, scanout() reads all memory at once. A fully beam-raced implementation
|
|
// would render out images every scanline, but that would cripple performance and it's questionable
|
|
// how this is useful, especially on a 3D console. The only failure case of this style of implementation
|
|
// would be if a demo attempted to modify VRAM *after* it has been scanned out, i.e. a write-after-read
|
|
// hazard.
|
|
|
|
// Latch registers are initialized to the values in set_vi_register() for each respective register.
|
|
// After scanout(), the flags state is cleared to 0.
|
|
void begin_vi_register_per_scanline(VideoInterface::PerScanlineRegisterFlags flags);
|
|
void set_vi_register_for_scanline(VideoInterface::PerScanlineRegisterBits reg, uint32_t value);
|
|
|
|
// Between begin_vi_register_per_scanline() and scanout(), line must be monotonically increasing,
|
|
// or the call is ignored. Initial value for the line counter is 0
|
|
// (to set parameters for line 0, use global VI register state).
|
|
// Currently set registers in set_vi_register_for_scanline() are considered to be the active VI register
|
|
// values starting with VI line "vi_line", until the bottom of the frame or a new vi_line is set.
|
|
// Register state is assumed to have been fixed from the last latched scanline up until vi_line.
|
|
//
|
|
// The units used for this value matches the hardware YStart registers,
|
|
// i.e. the first active scanline is not 0, but VI_H_OFFSET_{NTSC,PAL}.
|
|
// For every scanned line, vi_line should increment by 2.
|
|
// vi_line must be less than VI_V_END_MAX (really, VI_V_END_{NTSC,PAL}), or it is ignored.
|
|
void latch_vi_register_for_scanline(unsigned vi_line);
|
|
|
|
// Assumes that scanline register state does not change until end of frame.
|
|
// Must be called before scanout(), or all per-scanline register state is ignored for the scanout.
|
|
void end_vi_register_per_scanline();
|
|
|
|
// Intended flow is something like:
|
|
// set_vi_register(reg, value0) // value0 used for line [0, 99]
|
|
// begin_vi_register_per_scanline(flags);
|
|
// set_vi_register_for_scanline(reg, value1); // value1 used for line [100, 199]
|
|
// latch_vi_register_for_scanline(100);
|
|
// set_vi_register_for_scanline(reg, value2);
|
|
// latch_vi_register_for_scanline(200); // value2 used for line [200, VBlank]
|
|
// end_vi_register_per_scanline();
|
|
// scanout();
|
|
|
|
private:
|
|
Vulkan::Device &device;
|
|
Vulkan::BufferHandle rdram;
|
|
Vulkan::BufferHandle hidden_rdram;
|
|
Vulkan::BufferHandle tmem;
|
|
size_t rdram_offset;
|
|
size_t rdram_size;
|
|
CommandProcessorFlags flags;
|
|
#ifndef PARALLEL_RDP_SHADER_DIR
|
|
std::unique_ptr<ShaderBank> shader_bank;
|
|
#endif
|
|
|
|
// Tear-down order is important here.
|
|
Renderer renderer;
|
|
VideoInterface vi;
|
|
CommandRing ring;
|
|
|
|
void clear_hidden_rdram();
|
|
void clear_tmem();
|
|
void clear_buffer(Vulkan::Buffer &buffer, uint32_t value);
|
|
void init_renderer();
|
|
void enqueue_command_inner(unsigned num_words, const uint32_t *words);
|
|
|
|
Vulkan::ImageHandle scanout(const ScanoutOptions &opts, VkImageLayout target_layout);
|
|
|
|
#define OP(x) void op_##x(const uint32_t *words)
|
|
OP(fill_triangle); OP(fill_z_buffer_triangle); OP(texture_triangle); OP(texture_z_buffer_triangle);
|
|
OP(shade_triangle); OP(shade_z_buffer_triangle); OP(shade_texture_triangle); OP(shade_texture_z_buffer_triangle);
|
|
OP(texture_rectangle); OP(texture_rectangle_flip); OP(sync_load); OP(sync_pipe);
|
|
OP(sync_tile); OP(sync_full); OP(set_key_gb); OP(set_key_r);
|
|
OP(set_convert); OP(set_scissor); OP(set_prim_depth); OP(set_other_modes);
|
|
OP(load_tlut); OP(set_tile_size); OP(load_block);
|
|
OP(load_tile); OP(set_tile); OP(fill_rectangle); OP(set_fill_color);
|
|
OP(set_fog_color); OP(set_blend_color); OP(set_prim_color); OP(set_env_color);
|
|
OP(set_combine); OP(set_texture_image); OP(set_mask_image); OP(set_color_image);
|
|
#undef OP
|
|
|
|
ScissorState scissor_state = {};
|
|
StaticRasterizationState static_state = {};
|
|
DepthBlendState depth_blend = {};
|
|
|
|
struct
|
|
{
|
|
uint32_t addr;
|
|
uint32_t width;
|
|
TextureFormat fmt;
|
|
TextureSize size;
|
|
} texture_image = {};
|
|
|
|
uint64_t timeline_value = 0;
|
|
uint64_t thread_timeline_value = 0;
|
|
|
|
struct FenceExecutor
|
|
{
|
|
explicit inline FenceExecutor(Vulkan::Device *device_, uint64_t *ptr)
|
|
: device(device_), value(ptr)
|
|
{
|
|
}
|
|
|
|
Vulkan::Device *device;
|
|
uint64_t *value;
|
|
bool is_sentinel(const CoherencyOperation &work) const;
|
|
void perform_work(CoherencyOperation &work);
|
|
void notify_work_locked(const CoherencyOperation &work);
|
|
};
|
|
WorkerThread<CoherencyOperation, FenceExecutor> timeline_worker;
|
|
|
|
uint8_t *host_rdram = nullptr;
|
|
bool measure_stall_time = false;
|
|
bool single_threaded_processing = false;
|
|
bool is_supported = false;
|
|
bool is_host_coherent = true;
|
|
bool timestamp = false;
|
|
|
|
friend class Renderer;
|
|
|
|
void enqueue_coherency_operation(CoherencyOperation &&op);
|
|
void drain_command_ring();
|
|
void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const;
|
|
|
|
Quirks quirks;
|
|
|
|
std::unique_ptr<RDPDumpWriter> dump_writer;
|
|
bool dump_in_command_list = false;
|
|
};
|
|
}
|