From 435a26c591430bbf00b4df495e3c2a66d1558079 Mon Sep 17 00:00:00 2001 From: InoriRus Date: Wed, 29 Dec 2021 18:09:27 +1000 Subject: [PATCH] Minor changes --- source/CMakeLists.txt | 2 +- .../include/Emulator/Graphics/GpuMemory.h | 2 +- .../include/Emulator/Graphics/Graphics.h | 1 + .../Emulator/Graphics/GraphicsRender.h | 9 + .../include/Emulator/Graphics/GraphicsRun.h | 1 + .../include/Emulator/Graphics/Shader.h | 131 +-- .../include/Emulator/Graphics/Texture.h | 42 +- .../emulator/include/Emulator/Graphics/Tile.h | 4 +- .../include/Emulator/Graphics/Utils.h | 9 +- .../include/Emulator/Graphics/VideoOut.h | 15 +- .../Emulator/Graphics/VideoOutBuffer.h | 4 +- .../include/Emulator/Kernel/EventQueue.h | 14 +- source/emulator/include/Emulator/Log.h | 1 + source/emulator/src/Graphics/GpuMemory.cpp | 45 +- source/emulator/src/Graphics/Graphics.cpp | 12 +- .../emulator/src/Graphics/GraphicsRender.cpp | 804 ++++++++++++------ source/emulator/src/Graphics/GraphicsRun.cpp | 124 ++- source/emulator/src/Graphics/Label.cpp | 57 +- source/emulator/src/Graphics/Shader.cpp | 281 ++++-- source/emulator/src/Graphics/ShaderSpirv.cpp | 179 +++- source/emulator/src/Graphics/Texture.cpp | 155 +++- source/emulator/src/Graphics/Tile.cpp | 55 +- source/emulator/src/Graphics/Utils.cpp | 20 +- source/emulator/src/Graphics/VideoOut.cpp | 361 ++++++-- .../emulator/src/Graphics/VideoOutBuffer.cpp | 29 +- source/emulator/src/Graphics/Window.cpp | 41 +- source/emulator/src/Kernel/EventQueue.cpp | 16 +- .../emulator/src/Libs/LibGraphicsDriver.cpp | 1 + source/emulator/src/Libs/LibKernel.cpp | 15 +- source/emulator/src/Libs/LibVideoOut.cpp | 3 + source/emulator/src/Log.cpp | 29 + 31 files changed, 1817 insertions(+), 645 deletions(-) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index a54cb10..cd4da77 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -79,7 +79,7 @@ if (KYTY_LINKER STREQUAL LD) set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000") endif() -project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.4) +project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.5) include(src_script.cmake) diff --git a/source/emulator/include/Emulator/Graphics/GpuMemory.h b/source/emulator/include/Emulator/Graphics/GpuMemory.h index df95bee..8dacae0 100644 --- a/source/emulator/include/Emulator/Graphics/GpuMemory.h +++ b/source/emulator/include/Emulator/Graphics/GpuMemory.h @@ -72,7 +72,7 @@ void* GpuMemoryGetObject(GraphicContext* ctx, uint64_t vaddr, uint64_t size, con void* GpuMemoryGetObject(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, const GpuObject& info); void GpuMemoryResetHash(GraphicContext* ctx, uint64_t vaddr, uint64_t size, GpuMemoryObjectType type); void GpuMemoryDbgDump(); -void GpuMemoryFlush(); +void GpuMemoryFlush(GraphicContext* ctx); void GpuMemoryFrameDone(); void GpuMemoryWriteBack(GraphicContext* ctx); diff --git a/source/emulator/include/Emulator/Graphics/Graphics.h b/source/emulator/include/Emulator/Graphics/Graphics.h index d22602d..cd0cd1f 100644 --- a/source/emulator/include/Emulator/Graphics/Graphics.h +++ b/source/emulator/include/Emulator/Graphics/Graphics.h @@ -28,6 +28,7 @@ int KYTY_SYSV_ABI GraphicsSubmitAndFlipCommandBuffers(uint32_t count, void* void* ccb_gpu_addrs[], const uint32_t* ccb_sizes_in_bytes, int handle, int index, int flip_mode, int64_t flip_arg); int KYTY_SYSV_ABI GraphicsSubmitDone(); +int KYTY_SYSV_ABI GraphicsAreSubmitsAllowed(); void KYTY_SYSV_ABI GraphicsFlushMemory(); int KYTY_SYSV_ABI GraphicsAddEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id, void* udata); int KYTY_SYSV_ABI GraphicsDeleteEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id); diff --git a/source/emulator/include/Emulator/Graphics/GraphicsRender.h b/source/emulator/include/Emulator/Graphics/GraphicsRender.h index ab6c35a..cdc4084 100644 --- a/source/emulator/include/Emulator/Graphics/GraphicsRender.h +++ b/source/emulator/include/Emulator/Graphics/GraphicsRender.h @@ -12,6 +12,7 @@ namespace Kyty::Libs::Graphics { class HardwareContext; class UserConfig; +class CommandProcessor; struct VideoOutVulkanImage; struct DepthStencilVulkanImage; struct TextureVulkanImage; @@ -27,6 +28,8 @@ public: CommandBuffer() { Allocate(); } virtual ~CommandBuffer() { Free(); } + void SetParent(CommandProcessor* parent) { m_parent = parent; } + KYTY_CLASS_NO_COPY(CommandBuffer); [[nodiscard]] bool IsInvalid() const; @@ -48,11 +51,14 @@ public: void SetQueue(int queue) { m_queue = queue; } + void CommandProcessorWait(); + private: VulkanCommandPool* m_pool = nullptr; uint32_t m_index = static_cast(-1); int m_queue = -1; bool m_execute = false; + CommandProcessor* m_parent = nullptr; }; void GraphicsRenderInit(); @@ -67,7 +73,10 @@ void GraphicsRenderWriteAtEndOfPipe(CommandBuffer* buffer, uint32_t* dst_gpu_add void GraphicsRenderWriteAtEndOfPipeGds(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t dw_offset, uint32_t dw_num); void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBackFlip(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value, int handle, int index, int flip_mode, int64_t flip_arg); +void GraphicsRenderWriteAtEndOfPipeWithFlip(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value, int handle, int index, + int flip_mode, int64_t flip_arg); void GraphicsRenderWriteAtEndOfPipeWithWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value); +void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value); void GraphicsRenderWriteAtEndOfPipeWithInterrupt(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value); void GraphicsRenderWriteBack(); void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HardwareContext* ctx, uint32_t thread_group_x, uint32_t thread_group_y, diff --git a/source/emulator/include/Emulator/Graphics/GraphicsRun.h b/source/emulator/include/Emulator/Graphics/GraphicsRun.h index 84431c2..3dbd7f5 100644 --- a/source/emulator/include/Emulator/Graphics/GraphicsRun.h +++ b/source/emulator/include/Emulator/Graphics/GraphicsRun.h @@ -21,6 +21,7 @@ void GraphicsRunWait(); void GraphicsRunDone(); void GraphicsRunDingDong(uint32_t ring_id, uint32_t offset_dw); int GraphicsRunGetFrameNum(); +bool GraphicsRunAreSubmitsAllowed(); } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/include/Emulator/Graphics/Shader.h b/source/emulator/include/Emulator/Graphics/Shader.h index 1476a3c..1867f13 100644 --- a/source/emulator/include/Emulator/Graphics/Shader.h +++ b/source/emulator/include/Emulator/Graphics/Shader.h @@ -40,11 +40,13 @@ enum class ShaderInstructionType DsAppend, DsConsume, Exp, + ImageLoad, ImageSample, SAddcU32, SAddI32, SAddU32, SAndB32, + SAndB64, SAndn2B64, SAndSaveexecB64, SBfmB32, @@ -76,25 +78,28 @@ enum class ShaderInstructionType SLshrB32, SMovB32, SMovB64, + SMovkI32, SMulI32, + SNandB64, SNorB64, SOrB64, + SOrn2B64, SSetpcB64, SSwappcB64, SWaitcnt, SWqmB64, + SXnorB64, + SXorB64, TBufferLoadFormatXyzw, VAddI32, VAndB32, - VOrB32, - VXorB32, VAshrI32, VAshrrevI32, VBcntU32B32, VBfeU32, - VBfrevB32, - VCvtF32I32, VBfmB32, + VBfrevB32, + VCeilF32, VCmpEqF32, VCmpEqI32, VCmpEqU32, @@ -128,10 +133,13 @@ enum class ShaderInstructionType VCmpTU32, VCmpUF32, VCmpxEqU32, + VCmpxGeU32, VCmpxGtU32, VCmpxNeU32, VCndmaskB32, + VCosF32, VCvtF32F16, + VCvtF32I32, VCvtF32U32, VCvtF32Ubyte0, VCvtF32Ubyte1, @@ -139,6 +147,9 @@ enum class ShaderInstructionType VCvtF32Ubyte3, VCvtPkrtzF16F32, VCvtU32F32, + VExpF32, + VFloorF32, + VFractF32, VInterpP1F32, VInterpP2F32, VLshlB32, @@ -161,21 +172,19 @@ enum class ShaderInstructionType VMulLoU32, VMulU32U24, VNotB32, + VOrB32, VRcpF32, - VRsqF32, - VCeilF32, - VFractF32, VRndneF32, - VTruncF32, - VExpF32, - VCosF32, - VFloorF32, + VRsqF32, VSadU32, VSqrtF32, VSubF32, VSubI32, VSubrevF32, VSubrevI32, + VTruncF32, + VXorB32, + ZMax }; namespace ShaderInstructionFormat { @@ -274,6 +283,7 @@ enum Format : uint64_t Vdata4Vaddr2SvSoffsOffenIdxenFloat4 = FormatDefine({DA4, S0A2, S1A4, S2, Offen, Idxen, Float4}), Vdata3Vaddr3StSsDmask7 = FormatDefine({DA3, S0A3, S1A8, S2A4, Dmask7}), Vdata4Vaddr3StSsDmaskF = FormatDefine({DA4, S0A3, S1A8, S2A4, DmaskF}), + Vdata4Vaddr3StDmaskF = FormatDefine({DA4, S0A3, S1A8, DmaskF}), VdstVsrc0Vsrc1Smask2 = FormatDefine({D, S0, S1, S2A2}), VdstVsrc0Vsrc1Vsrc2 = FormatDefine({D, S0, S1, S2}), VdstVsrcAttrChan = FormatDefine({D, S0, Attr}), @@ -382,11 +392,18 @@ public: { return m_instructions.Contains(type, [](auto inst, auto type) { return inst.type == type; }); }); } + [[nodiscard]] bool IsEmbedded() const { return m_embedded; } + void SetEmbedded(bool embedded) { this->m_embedded = embedded; } + [[nodiscard]] uint32_t GetEmbeddedId() const { return m_embedded_id; } + void SetEmbeddedId(uint32_t embedded_id) { m_embedded_id = embedded_id; } + private: Vector m_instructions; Vector m_labels; ShaderType m_type = ShaderType::Unknown; Vector m_debug_printfs; + uint32_t m_embedded_id = 0; + bool m_embedded = false; }; struct ShaderId @@ -562,9 +579,8 @@ struct ShaderStorageResources int slots[BUFFERS_MAX] = {0}; int start_register[BUFFERS_MAX] = {0}; bool extended[BUFFERS_MAX] = {}; - // int extended_index[BUFFERS_MAX] = {0}; - int buffers_num = 0; - int binding_index = 0; + int buffers_num = 0; + int binding_index = 0; }; struct ShaderTextureResources @@ -574,9 +590,8 @@ struct ShaderTextureResources ShaderTextureResource textures[RES_MAX]; int start_register[RES_MAX] = {0}; bool extended[RES_MAX] = {}; - // int extended_index[RES_MAX] = {0}; - int textures_num = 0; - int binding_index = 0; + int textures_num = 0; + int binding_index = 0; }; struct ShaderSamplerResources @@ -586,9 +601,8 @@ struct ShaderSamplerResources ShaderSamplerResource samplers[RES_MAX]; int start_register[RES_MAX] = {0}; bool extended[RES_MAX] = {}; - // int extended_index[RES_MAX] = {0}; - int samplers_num = 0; - int binding_index = 0; + int samplers_num = 0; + int binding_index = 0; }; struct ShaderGdsResources @@ -599,21 +613,19 @@ struct ShaderGdsResources int slots[POINTERS_MAX] = {0}; int start_register[POINTERS_MAX] = {0}; bool extended[POINTERS_MAX] = {}; - // int extended_index[POINTERS_MAX] = {0}; - int pointers_num = 0; - int binding_index = 0; + int pointers_num = 0; + int binding_index = 0; }; struct ShaderExtendedResources { - bool used = false; - int slot = 0; - // int dw_num = 0; + bool used = false; + int slot = 0; int start_register = 0; ShaderExtendedResource data; }; -struct ShaderResources +struct ShaderBindResources { uint32_t push_constant_offset = 0; uint32_t push_constant_size = 0; @@ -625,6 +637,11 @@ struct ShaderResources ShaderExtendedResources extended; }; +struct ShaderBindParameters +{ + bool textures2D_without_sampler = false; +}; + struct ShaderVertexInputInfo { static constexpr int RES_MAX = 16; @@ -636,43 +653,49 @@ struct ShaderVertexInputInfo ShaderVertexInputBuffer buffers[RES_MAX]; int buffers_num = 0; int export_count = 0; - ShaderResources bind; + ShaderBindResources bind; }; struct ShaderComputeInputInfo { - uint32_t threads_num[3] = {0, 0, 0}; - bool group_id[3] = {false, false, false}; - int thread_ids_num = 0; - int workgroup_register = 0; - ShaderResources bind; + uint32_t threads_num[3] = {0, 0, 0}; + bool group_id[3] = {false, false, false}; + int thread_ids_num = 0; + int workgroup_register = 0; + ShaderBindResources bind; }; struct ShaderPixelInputInfo { - uint32_t interpolator_settings[32] = {0}; - uint32_t input_num = 0; - uint8_t target_output_mode[8] = {}; - bool ps_pos_xy = false; - bool ps_pixel_kill_enable = false; - ShaderResources bind; + uint32_t interpolator_settings[32] = {0}; + uint32_t input_num = 0; + uint8_t target_output_mode[8] = {}; + bool ps_pos_xy = false; + bool ps_pixel_kill_enable = false; + ShaderBindResources bind; }; -void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info); -void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info); -void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info); -ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info); -ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info); -ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info); -Vector ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info); -Vector ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info); -Vector ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info); -bool ShaderIsDisabled(uint64_t addr); -void ShaderDisable(uint64_t id); -void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd); +void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info); +void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info); +void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info); +ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info); +ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info); +ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info); +ShaderCode ShaderParseVS(const VertexShaderInfo* regs); +ShaderCode ShaderParsePS(const PixelShaderInfo* regs); +ShaderCode ShaderParseCS(const ComputeShaderInfo* regs); +ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info); +ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info); +ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info); +Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info); +Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info); +Vector ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info); +bool ShaderIsDisabled(uint64_t addr); +void ShaderDisable(uint64_t id); +void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd); } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/include/Emulator/Graphics/Texture.h b/source/emulator/include/Emulator/Graphics/Texture.h index acbe36c..e3b469c 100644 --- a/source/emulator/include/Emulator/Graphics/Texture.h +++ b/source/emulator/include/Emulator/Graphics/Texture.h @@ -16,27 +16,31 @@ struct VulkanMemory; class TextureObject: public GpuObject { public: - static constexpr int PARAM_DFMT = 0; - static constexpr int PARAM_NFMT = 1; - static constexpr int PARAM_WIDTH = 2; - static constexpr int PARAM_HEIGHT = 3; - static constexpr int PARAM_LEVELS = 4; - static constexpr int PARAM_TILE = 5; - static constexpr int PARAM_NEO = 6; - static constexpr int PARAM_SWIZZLE = 7; + static constexpr int TEXTURE_USAGE_SAMPLED = 0; + static constexpr int TEXTURE_USAGE_STORAGE = 1; - TextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool htile, bool neo, uint32_t swizzle) + static constexpr int PARAM_DFMT_NFMT = 0; + static constexpr int PARAM_PITCH = 1; + static constexpr int PARAM_WIDTH_HEIGHT = 2; + static constexpr int PARAM_USAGE = 3; + static constexpr int PARAM_LEVELS = 4; + static constexpr int PARAM_TILE = 5; + static constexpr int PARAM_NEO = 6; + static constexpr int PARAM_SWIZZLE = 7; + + TextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool htile, bool neo, + uint32_t swizzle, uint32_t usage) { - params[PARAM_DFMT] = dfmt; - params[PARAM_NFMT] = nfmt; - params[PARAM_WIDTH] = width; - params[PARAM_HEIGHT] = height; - params[PARAM_LEVELS] = levels; - params[PARAM_TILE] = htile ? 1 : 0; - params[PARAM_NEO] = neo ? 1 : 0; - params[PARAM_SWIZZLE] = swizzle; - check_hash = true; - type = Graphics::GpuMemoryObjectType::Texture; + params[PARAM_DFMT_NFMT] = (static_cast(dfmt) << 32u) | nfmt; + params[PARAM_PITCH] = pitch; + params[PARAM_WIDTH_HEIGHT] = (static_cast(width) << 32u) | height; + params[PARAM_USAGE] = usage; + params[PARAM_LEVELS] = levels; + params[PARAM_TILE] = htile ? 1 : 0; + params[PARAM_NEO] = neo ? 1 : 0; + params[PARAM_SWIZZLE] = swizzle; + check_hash = true; + type = Graphics::GpuMemoryObjectType::Texture; } void* Create(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, VulkanMemory* mem) const override; diff --git a/source/emulator/include/Emulator/Graphics/Tile.h b/source/emulator/include/Emulator/Graphics/Tile.h index bc10cf1..4ee9282 100644 --- a/source/emulator/include/Emulator/Graphics/Tile.h +++ b/source/emulator/include/Emulator/Graphics/Tile.h @@ -24,8 +24,8 @@ void TileConvertTiledToLinear(void* dst, const void* src, TileMode mode, uint32_ void TileGetDepthSize(uint32_t width, uint32_t height, uint32_t z_format, uint32_t stencil_format, bool htile, bool neo, uint32_t* stencil_size, uint32_t* htile_size, uint32_t* depth_size, uint32_t* pitch); -void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size); -void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool tile, bool neo, +void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size, uint32_t* pitch); +void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool tile, bool neo, uint32_t* total_size, uint32_t* level_sizes, uint32_t* padded_width, uint32_t* padded_height); } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/include/Emulator/Graphics/Utils.h b/source/emulator/include/Emulator/Graphics/Utils.h index 505b61a..eea164e 100644 --- a/source/emulator/include/Emulator/Graphics/Utils.h +++ b/source/emulator/include/Emulator/Graphics/Utils.h @@ -27,15 +27,16 @@ struct BufferImageCopy uint32_t offset; uint32_t width; uint32_t height; + uint32_t pitch; }; -void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOutVulkanImage* dst_image); +void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, uint32_t src_pitch, VideoOutVulkanImage* dst_image); void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureVulkanImage* dst_image, - const Vector& regions); + const Vector& regions, uint64_t dst_layout); void UtilBlitImage(CommandBuffer* buffer, VideoOutVulkanImage* src_image, VulkanSwapchain* dst_swapchain); -void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* dst_image, const void* src_data, uint64_t size); +void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* dst_image, const void* src_data, uint64_t size, uint32_t src_pitch); void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* dst_image, const void* src_data, uint64_t size, - const Vector& regions); + const Vector& regions, uint64_t dst_layout); void UtilCopyBuffer(VulkanBuffer* src_buffer, VulkanBuffer* dst_buffer, uint64_t size); void UtilSetImageLayoutOptimal(DepthStencilVulkanImage* image); void UtilSetImageLayoutOptimal(VideoOutVulkanImage* image); diff --git a/source/emulator/include/Emulator/Graphics/VideoOut.h b/source/emulator/include/Emulator/Graphics/VideoOut.h index 10f1f54..c21513c 100644 --- a/source/emulator/include/Emulator/Graphics/VideoOut.h +++ b/source/emulator/include/Emulator/Graphics/VideoOut.h @@ -19,12 +19,14 @@ namespace Kyty::Libs::VideoOut { struct VideoOutResolutionStatus; struct VideoOutBufferAttribute; struct VideoOutFlipStatus; +struct VideoOutVblankStatus; struct VideoOutBufferImageInfo { - Graphics::VideoOutVulkanImage* image = nullptr; - uint32_t index = static_cast(-1); - uint64_t buffer_size = 0; + Graphics::VideoOutVulkanImage* image = nullptr; + uint32_t index = static_cast(-1); + uint64_t buffer_size = 0; + uint64_t buffer_pitch = 0; }; void VideoOutInit(uint32_t width, uint32_t height); @@ -38,12 +40,17 @@ KYTY_SYSV_ABI void VideoOutSetBufferAttribute(VideoOutBufferAttribute* attribute uint32_t aspect_ratio, uint32_t width, uint32_t height, uint32_t pitch_in_pixel); KYTY_SYSV_ABI int VideoOutSetFlipRate(int handle, int rate); KYTY_SYSV_ABI int VideoOutAddFlipEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata); +KYTY_SYSV_ABI int VideoOutAddVblankEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata); KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* const* addresses, int buffer_num, const VideoOutBufferAttribute* attribute); KYTY_SYSV_ABI int VideoOutSubmitFlip(int handle, int index, int flip_mode, int64_t flip_arg); KYTY_SYSV_ABI int VideoOutGetFlipStatus(int handle, VideoOutFlipStatus* status); +KYTY_SYSV_ABI int VideoOutGetVblankStatus(int handle, VideoOutVblankStatus* status); +KYTY_SYSV_ABI int VideoOutSetWindowModeMargins(int handle, int top, int bottom); -bool FlipWindow(uint32_t micros); +void VideoOutBeginVblank(); +void VideoOutEndVblank(); +bool VideoOutFlipWindow(uint32_t micros); } // namespace Kyty::Libs::VideoOut diff --git a/source/emulator/include/Emulator/Graphics/VideoOutBuffer.h b/source/emulator/include/Emulator/Graphics/VideoOutBuffer.h index b4a59b1..f90f3c0 100644 --- a/source/emulator/include/Emulator/Graphics/VideoOutBuffer.h +++ b/source/emulator/include/Emulator/Graphics/VideoOutBuffer.h @@ -21,14 +21,16 @@ public: static constexpr int PARAM_HEIGHT = 2; static constexpr int PARAM_TILED = 3; static constexpr int PARAM_NEO = 4; + static constexpr int PARAM_PITCH = 5; - explicit VideoOutBufferObject(uint32_t pixel_format, uint32_t width, uint32_t height, bool tiled, bool neo) + explicit VideoOutBufferObject(uint32_t pixel_format, uint32_t width, uint32_t height, bool tiled, bool neo, uint32_t pitch) { params[PARAM_FORMAT] = pixel_format; params[PARAM_WIDTH] = width; params[PARAM_HEIGHT] = height; params[PARAM_TILED] = tiled ? 1 : 0; params[PARAM_NEO] = neo ? 1 : 0; + params[PARAM_PITCH] = pitch; check_hash = true; type = Graphics::GpuMemoryObjectType::VideoOutBuffer; } diff --git a/source/emulator/include/Emulator/Kernel/EventQueue.h b/source/emulator/include/Emulator/Kernel/EventQueue.h index 7758600..0187578 100644 --- a/source/emulator/include/Emulator/Kernel/EventQueue.h +++ b/source/emulator/include/Emulator/Kernel/EventQueue.h @@ -22,9 +22,11 @@ constexpr int16_t KERNEL_EVFILT_HRTIMER = -15; class KernelEqueuePrivate; struct KernelEqueueEvent; +using KernelEqueue = KernelEqueuePrivate*; + using trigger_func_t = void (*)(KernelEqueueEvent* event, void* trigger_data); using reset_func_t = void (*)(KernelEqueueEvent* event); -using delete_func_t = void (*)(KernelEqueueEvent* event); +using delete_func_t = void (*)(KernelEqueue eq, KernelEqueueEvent* event); struct KernelEvent { @@ -38,10 +40,10 @@ struct KernelEvent struct KernelFilter { - void* data = nullptr; - trigger_func_t trigger_func = nullptr; - reset_func_t reset_func = nullptr; - delete_func_t delete_func = nullptr; + void* data = nullptr; + trigger_func_t trigger_func = nullptr; + reset_func_t reset_func = nullptr; + delete_func_t delete_event_func = nullptr; }; struct KernelEqueueEvent @@ -51,8 +53,6 @@ struct KernelEqueueEvent KernelFilter filter; }; -using KernelEqueue = KernelEqueuePrivate*; - int KYTY_SYSV_ABI KernelAddEvent(KernelEqueue eq, const KernelEqueueEvent& event); int KYTY_SYSV_ABI KernelTriggerEvent(KernelEqueue eq, uintptr_t ident, int16_t filter, void* trigger_data); int KYTY_SYSV_ABI KernelDeleteEvent(KernelEqueue eq, uintptr_t ident, int16_t filter); diff --git a/source/emulator/include/Emulator/Log.h b/source/emulator/include/Emulator/Log.h index 98826eb..d605edf 100644 --- a/source/emulator/include/Emulator/Log.h +++ b/source/emulator/include/Emulator/Log.h @@ -77,6 +77,7 @@ String RemoveColors(const String& str); } // namespace Log void printf(const char* format, ...) KYTY_FORMAT_PRINTF(1, 2); +void emu_printf(const char* format, ...) KYTY_FORMAT_PRINTF(1, 2); } // namespace Kyty diff --git a/source/emulator/src/Graphics/GpuMemory.cpp b/source/emulator/src/Graphics/GpuMemory.cpp index d162689..5a08244 100644 --- a/source/emulator/src/Graphics/GpuMemory.cpp +++ b/source/emulator/src/Graphics/GpuMemory.cpp @@ -34,7 +34,12 @@ public: void* GetObject(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, const GpuObject& info); void ResetHash(GraphicContext* ctx, uint64_t* vaddr, uint64_t* size, int vaddr_num, GpuMemoryObjectType type); void FrameDone(); - void WriteBack(GraphicContext* ctx); + + // Sync: GPU -> CPU + void WriteBack(GraphicContext* ctx); + + // Sync: CPU -> GPU + void Flush(GraphicContext* ctx); void DbgDump(); @@ -387,7 +392,17 @@ void* GpuMemory::GetObject(GraphicContext* ctx, const uint64_t* vaddr, const uin for (int vi = 0; vi < vaddr_num; vi++) { - EXIT_NOT_IMPLEMENTED(!h.free && vaddr_overlap(h.vaddr, h.size, h.overlaps_num, vaddr[vi], size[vi])); + if (!h.free && vaddr_overlap(h.vaddr, h.size, h.overlaps_num, vaddr[vi], size[vi])) + { + if (h.overlaps_num == 1 && + (h.overlaps[0].type == GpuMemoryObjectType::Label || h.overlaps[0].type == GpuMemoryObjectType::StorageBuffer)) + { + Free(ctx, h); + } else + { + KYTY_NOT_IMPLEMENTED; + } + } } } @@ -641,6 +656,25 @@ void GpuMemory::WriteBack(GraphicContext* ctx) } } +void GpuMemory::Flush(GraphicContext* ctx) +{ + Core::LockGuard lock(m_mutex); + + for (auto& h: m_objects) + { + if (!h.free) + { + for (int oi = 0; oi < h.overlaps_num; oi++) + { + auto& o = h.overlaps[oi]; + + EXIT_IF(o.update_func == nullptr); + o.update_func(ctx, o.params, o.obj, h.vaddr, h.size, h.vaddr_num); + } + } + } +} + void GpuMemory::DbgDump() { Core::LockGuard lock(m_mutex); @@ -756,11 +790,13 @@ void GpuMemoryDbgDump() g_gpu_memory->DbgDump(); } -void GpuMemoryFlush() +void GpuMemoryFlush(GraphicContext* ctx) { EXIT_IF(g_gpu_memory == nullptr); + EXIT_IF(ctx == nullptr); - // TODO(): update vulkan objects after CPU-drawing + // update vulkan objects after CPU-drawing + g_gpu_memory->Flush(ctx); } void GpuMemoryFrameDone() @@ -775,6 +811,7 @@ void GpuMemoryWriteBack(GraphicContext* ctx) EXIT_IF(g_gpu_memory == nullptr); EXIT_IF(ctx == nullptr); + // update CPU memory after GPU-drawing g_gpu_memory->WriteBack(ctx); } diff --git a/source/emulator/src/Graphics/Graphics.cpp b/source/emulator/src/Graphics/Graphics.cpp index bd41b41..2882a75 100644 --- a/source/emulator/src/Graphics/Graphics.cpp +++ b/source/emulator/src/Graphics/Graphics.cpp @@ -89,6 +89,7 @@ int KYTY_SYSV_ABI GraphicsSetPsShader350(uint32_t* cmd, uint64_t size, const uin { PRINT_NAME(); + EXIT_NOT_IMPLEMENTED(ps_regs == nullptr); EXIT_NOT_IMPLEMENTED(size < sizeof(PsStageRegisters) / 12 + 1); printf("\t cmd_buffer = %016" PRIx64 "\n", reinterpret_cast(cmd)); @@ -323,19 +324,20 @@ int KYTY_SYSV_ABI GraphicsSubmitDone() PRINT_NAME(); GraphicsRunDone(); - // GpuMemoryFrameDone(); - // GpuMemoryDbgDump(); return OK; } +int KYTY_SYSV_ABI GraphicsAreSubmitsAllowed() +{ + return GraphicsRunAreSubmitsAllowed() ? 1 : 0; +} + void KYTY_SYSV_ABI GraphicsFlushMemory() { PRINT_NAME(); - GraphicsRunDone(); - - EXIT("1"); + GpuMemoryFlush(WindowGetGraphicContext()); } int KYTY_SYSV_ABI GraphicsAddEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id, void* udata) diff --git a/source/emulator/src/Graphics/GraphicsRender.cpp b/source/emulator/src/Graphics/GraphicsRender.cpp index 3d87bbc..86397a9 100644 --- a/source/emulator/src/Graphics/GraphicsRender.cpp +++ b/source/emulator/src/Graphics/GraphicsRender.cpp @@ -19,6 +19,7 @@ #include "Emulator/Graphics/Utils.h" #include "Emulator/Graphics/VertexBuffer.h" #include "Emulator/Graphics/VideoOut.h" +#include "Emulator/Graphics/VideoOutBuffer.h" #include "Emulator/Graphics/Window.h" #include "Emulator/Kernel/EventQueue.h" #include "Emulator/Kernel/Pthread.h" @@ -26,7 +27,6 @@ #include "Emulator/Profiler.h" #include -#include #include #ifdef KYTY_EMU_ENABLED @@ -38,12 +38,6 @@ constexpr int GRAPHICS_EVENT_EOP = 0x40; struct Label; struct RenderDepthInfo; -struct VulkanPipeline -{ - VkPipelineLayout pipeline_layout = nullptr; - VkPipeline pipeline = nullptr; -}; - struct VulkanDescriptor { VkDescriptorSet descriptor_set = nullptr; @@ -81,6 +75,20 @@ struct PipelineParameters }; #pragma pack(pop) +struct PipelineAdditionalParameters +{ + ShaderBindParameters vs_bind; + ShaderBindParameters ps_bind; + ShaderBindParameters cs_bind; +}; + +struct VulkanPipeline +{ + VkPipelineLayout pipeline_layout = nullptr; + VkPipeline pipeline = nullptr; + const PipelineAdditionalParameters* additional_params = nullptr; +}; + class PipelineCache { public: @@ -102,16 +110,18 @@ private: struct Pipeline { - uint64_t render_pass_id = 0; - ShaderId vs_shader_id; - ShaderId ps_shader_id; - ShaderId cs_shader_id; - PipelineParameters params; - VulkanPipeline* pipeline = nullptr; + uint64_t render_pass_id = 0; + ShaderId vs_shader_id; + ShaderId ps_shader_id; + ShaderId cs_shader_id; + VulkanPipeline* pipeline = nullptr; + PipelineParameters* params = nullptr; }; [[nodiscard]] VulkanPipeline* Find(const Pipeline& p) const; + static void DeletePipelineInternal(Pipeline& p); + Vector m_pipelines; Core::Mutex m_mutex; }; @@ -134,24 +144,27 @@ public: Compute }; - static constexpr int BUFFERS_MAX = ShaderStorageResources::BUFFERS_MAX; - static constexpr int TEXTURES_MAX = ShaderTextureResources::RES_MAX; - static constexpr int SAMPLERS_MAX = ShaderSamplerResources::RES_MAX; - static constexpr int PUSH_CONSTANTS_MAX = 16 * 4; - static constexpr int GDS_BUFFER_MAX = 1; + static constexpr int BUFFERS_MAX = ShaderStorageResources::BUFFERS_MAX; + static constexpr int TEXTURES_SAMPLED_MAX = ShaderTextureResources::RES_MAX; + static constexpr int TEXTURES_STORAGE_MAX = ShaderTextureResources::RES_MAX; + static constexpr int SAMPLERS_MAX = ShaderSamplerResources::RES_MAX; + static constexpr int PUSH_CONSTANTS_MAX = 16 * 4; + static constexpr int GDS_BUFFER_MAX = 1; DescriptorCache() { EXIT_NOT_IMPLEMENTED(!Core::Thread::IsMainThread()); } virtual ~DescriptorCache() { KYTY_NOT_IMPLEMENTED; } KYTY_CLASS_NO_COPY(DescriptorCache); - VkDescriptorSetLayout GetDescriptorSetLayout(Stage stage, int storage_buffers_num, int textures2d_num, int samplers_num, - int gds_buffers_num); + VkDescriptorSetLayout GetDescriptorSetLayout(Stage stage, int storage_buffers_num, int textures2d_sampled_num, + int textures2d_storage_num, int samplers_num, int gds_buffers_num); - VulkanDescriptorSet* Allocate(Stage stage, int storage_buffers_num, int textures2d_num, int samplers_num, int gds_buffers_num); + VulkanDescriptorSet* Allocate(Stage stage, int storage_buffers_num, int textures2d_sampled_num, int textures2d_storage_num, + int samplers_num, int gds_buffers_num); void Free(VulkanDescriptorSet* set); - VulkanDescriptorSet* GetDescriptor(Stage stage, int storage_buffers_num, VulkanBuffer** storage_buffers, int textures2d_num, - TextureVulkanImage** textures2d, int samplers_num, uint64_t* samplers, int gds_buffers_num, + VulkanDescriptorSet* GetDescriptor(Stage stage, int storage_buffers_num, VulkanBuffer** storage_buffers, int textures2d_sampled_num, + TextureVulkanImage** textures2d_sampled, int textures2d_storage_num, + TextureVulkanImage** textures2d_storage, int samplers_num, uint64_t* samplers, int gds_buffers_num, VulkanBuffer** gds_buffers); void FreeDescriptor(VulkanBuffer* buffer); void FreeDescriptor(TextureVulkanImage* image); @@ -159,16 +172,18 @@ public: private: struct Set { - VulkanDescriptorSet* set = nullptr; - Stage stage = Stage::Unknown; - int storage_buffers_num = 0; - uint64_t storage_buffers_id[BUFFERS_MAX] = {}; - int textures2d_num = 0; - uint64_t textures2d_id[TEXTURES_MAX] = {}; - int samplers_num = 0; - uint64_t samplers_id[SAMPLERS_MAX] = {}; - int gds_buffers_num = 0; - uint64_t gds_buffers_id[GDS_BUFFER_MAX] = {}; + VulkanDescriptorSet* set = nullptr; + Stage stage = Stage::Unknown; + int storage_buffers_num = 0; + uint64_t storage_buffers_id[BUFFERS_MAX] = {}; + int textures2d_sampled_num = 0; + uint64_t textures2d_sampled_id[TEXTURES_SAMPLED_MAX] = {}; + int textures2d_storage_num = 0; + uint64_t textures2d_storage_id[TEXTURES_STORAGE_MAX] = {}; + int samplers_num = 0; + uint64_t samplers_id[SAMPLERS_MAX] = {}; + int gds_buffers_num = 0; + uint64_t gds_buffers_id[GDS_BUFFER_MAX] = {}; }; void Init(); void CreatePool(); @@ -176,10 +191,13 @@ private: Core::Mutex m_mutex; Vector m_pools; Vector m_sets; - VkDescriptorSetLayout m_descriptor_set_layout_vertex[BUFFERS_MAX + 1][TEXTURES_MAX + 1][SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; - VkDescriptorSetLayout m_descriptor_set_layout_pixel[BUFFERS_MAX + 1][TEXTURES_MAX + 1][SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; - VkDescriptorSetLayout m_descriptor_set_layout_compute[BUFFERS_MAX + 1][TEXTURES_MAX + 1][SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; - bool m_initialized = false; + VkDescriptorSetLayout m_descriptor_set_layout_vertex[BUFFERS_MAX + 1][TEXTURES_SAMPLED_MAX + 1][TEXTURES_STORAGE_MAX + 1] + [SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; + VkDescriptorSetLayout m_descriptor_set_layout_pixel[BUFFERS_MAX + 1][TEXTURES_SAMPLED_MAX + 1][TEXTURES_STORAGE_MAX + 1] + [SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; + VkDescriptorSetLayout m_descriptor_set_layout_compute[BUFFERS_MAX + 1][TEXTURES_SAMPLED_MAX + 1][TEXTURES_STORAGE_MAX + 1] + [SAMPLERS_MAX + 1][GDS_BUFFER_MAX + 1] = {}; + bool m_initialized = false; }; class SamplerCache @@ -280,8 +298,11 @@ public: SamplerCache* GetSamplerCache() { return m_sampler_cache; } GdsBuffer* GetGdsBuffer() { return m_gds_buffer; } - [[nodiscard]] LibKernel::EventQueue::KernelEqueue GetEopEq() const { return m_eop_eq; } - void SetEopEq(LibKernel::EventQueue::KernelEqueue eop_eq) { m_eop_eq = eop_eq; } + //[[nodiscard]] LibKernel::EventQueue::KernelEqueue GetEopEq() const { return m_eop_eq; } + // void SetEopEq(LibKernel::EventQueue::KernelEqueue eop_eq) { m_eop_eq = eop_eq; } + void AddEopEq(LibKernel::EventQueue::KernelEqueue eq); + void DeleteEopEq(LibKernel::EventQueue::KernelEqueue eq); + void TriggerEopEvent(); private: Core::Mutex m_mutex; @@ -292,7 +313,8 @@ private: GraphicContext* m_graphic_ctx = nullptr; GdsBuffer* m_gds_buffer = nullptr; - LibKernel::EventQueue::KernelEqueue m_eop_eq = nullptr; + Core::Mutex m_eop_mutex; + Vector m_eop_eqs; }; struct RenderDepthInfo @@ -425,8 +447,8 @@ static void rt_check(const RenderTarget& rt) if (rt.base_addr != 0) { // EXIT_NOT_IMPLEMENTED(rt.base_addr == 0); - EXIT_NOT_IMPLEMENTED(rt.pitch_div8_minus1 != 0x000000ef); - EXIT_NOT_IMPLEMENTED(rt.fmask_pitch_div8_minus1 != 0x000000ef); + // EXIT_NOT_IMPLEMENTED(rt.pitch_div8_minus1 != 0x000000ef); + // EXIT_NOT_IMPLEMENTED(rt.fmask_pitch_div8_minus1 != 0x000000ef); // EXIT_NOT_IMPLEMENTED(rt.slice_div64_minus1 != 0x000086ff); EXIT_NOT_IMPLEMENTED(rt.base_array_slice_index != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.last_array_slice_index != 0x00000000); @@ -437,12 +459,12 @@ static void rt_check(const RenderTarget& rt) EXIT_NOT_IMPLEMENTED(rt.neo_mode != Config::IsNeo()); EXIT_NOT_IMPLEMENTED(rt.cmask_tile_mode != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.cmask_tile_mode_neo != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.format != 0x0000000a); - EXIT_NOT_IMPLEMENTED(rt.channel_type != 0x00000006); - EXIT_NOT_IMPLEMENTED(rt.channel_order != 0x00000001); + // EXIT_NOT_IMPLEMENTED(rt.format != 0x0000000a); + // EXIT_NOT_IMPLEMENTED(rt.channel_type != 0x00000006); + // EXIT_NOT_IMPLEMENTED(rt.channel_order != 0x00000001); EXIT_NOT_IMPLEMENTED(rt.force_dest_alpha_to_one != false); - EXIT_NOT_IMPLEMENTED(rt.tile_mode != 0x0000000a); - EXIT_NOT_IMPLEMENTED(rt.fmask_tile_mode != 0x0000000a); + // EXIT_NOT_IMPLEMENTED(rt.tile_mode != 0x0000000a); + // EXIT_NOT_IMPLEMENTED(rt.fmask_tile_mode != 0x0000000a); EXIT_NOT_IMPLEMENTED(rt.num_samples != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.num_fragments != 0x00000000); // EXIT_NOT_IMPLEMENTED(rt.dcc_max_uncompressed_block_size != 0x00000002); @@ -458,8 +480,8 @@ static void rt_check(const RenderTarget& rt) EXIT_NOT_IMPLEMENTED(rt.clear_color_word0 != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.clear_color_word1 != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.dcc_addr != 0x0000000000000000); - EXIT_NOT_IMPLEMENTED(rt.width != 0x00000780); - EXIT_NOT_IMPLEMENTED(rt.height != 0x00000438); + // EXIT_NOT_IMPLEMENTED(rt.width != 0x00000780); + // EXIT_NOT_IMPLEMENTED(rt.height != 0x00000438); } } @@ -756,10 +778,10 @@ static void vp_check(const ScreenViewport& vp) // EXIT_NOT_IMPLEMENTED(vp.scissor_top != 0); // EXIT_NOT_IMPLEMENTED(vp.scissor_right != 1920); // EXIT_NOT_IMPLEMENTED(vp.scissor_bottom != 1080); - EXIT_NOT_IMPLEMENTED(vp.hw_offset_x != 60); - EXIT_NOT_IMPLEMENTED(vp.hw_offset_y != 32); - EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_horz_clip - 33.133327f) > 0.001f); - EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_vert_clip - 59.629623f) > 0.001f); + // EXIT_NOT_IMPLEMENTED(vp.hw_offset_x != 60); + // EXIT_NOT_IMPLEMENTED(vp.hw_offset_y != 32); + // EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_horz_clip - 33.133327f) > 0.001f); + // EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_vert_clip - 59.629623f) > 0.001f); EXIT_NOT_IMPLEMENTED(vp.guard_band_horz_discard != 1.000000); EXIT_NOT_IMPLEMENTED(vp.guard_band_vert_discard != 1.000000); } @@ -827,6 +849,40 @@ void GraphicsRenderCreateContext() g_render_ctx->SetGraphicCtx(WindowGetGraphicContext()); } +void RenderContext::AddEopEq(LibKernel::EventQueue::KernelEqueue eq) +{ + Core::LockGuard lock(m_eop_mutex); + + EXIT_NOT_IMPLEMENTED(m_eop_eqs.Contains(eq)); + + m_eop_eqs.Add(eq); +} + +void RenderContext::DeleteEopEq(LibKernel::EventQueue::KernelEqueue eq) +{ + Core::LockGuard lock(m_eop_mutex); + + auto index = m_eop_eqs.Find(eq); + EXIT_NOT_IMPLEMENTED(!m_eop_eqs.IndexValid(index)); + m_eop_eqs[index] = nullptr; +} + +void RenderContext::TriggerEopEvent() +{ + Core::LockGuard lock(m_eop_mutex); + + for (auto& eop_eq: m_eop_eqs) + { + if (eop_eq != nullptr) + { + auto result = + LibKernel::EventQueue::KernelTriggerEvent(eop_eq, GRAPHICS_EVENT_EOP, LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS, + reinterpret_cast(LibKernel::KernelReadTsc())); + EXIT_NOT_IMPLEMENTED(result != OK); + } + } +} + void GdsBuffer::Init(GraphicContext* ctx) { if (m_buffer == nullptr) @@ -1042,7 +1098,7 @@ VulkanFramebuffer* FramebufferCache::CreateFramebuffer(RenderColorInfo* color, R VideoOutVulkanImage* vulkan_buffer = (with_color ? color->vulkan_buffer - : CreateDummyBuffer(VK_FORMAT_R8G8B8A8_SRGB, depth->vulkan_buffer->extent.width, depth->vulkan_buffer->extent.height)); + : CreateDummyBuffer(VK_FORMAT_B8G8R8A8_SRGB, depth->vulkan_buffer->extent.width, depth->vulkan_buffer->extent.height)); VkAttachmentDescription attachments[2]; attachments[0].flags = 0; @@ -1461,8 +1517,8 @@ static VkBlendOp get_blend_op(uint32_t op) } static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layouts_num, VkPushConstantRange* push_constant_info, - uint32_t* push_constant_info_num, const ShaderResources& bind, VkShaderStageFlags vk_stage, - DescriptorCache::Stage stage) + uint32_t* push_constant_info_num, const ShaderBindResources& bind, const ShaderBindParameters& bind_params, + VkShaderStageFlags vk_stage, DescriptorCache::Stage stage) { EXIT_IF(set_layouts == nullptr); EXIT_IF(set_layouts_num == nullptr); @@ -1489,7 +1545,8 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou EXIT_IF(bind.descriptor_set_slot != *set_layouts_num); set_layouts[*set_layouts_num] = g_render_ctx->GetDescriptorCache()->GetDescriptorSetLayout( - stage, bind.storage_buffers.buffers_num, bind.textures2D.textures_num, bind.samplers.samplers_num, + stage, bind.storage_buffers.buffers_num, (bind_params.textures2D_without_sampler ? 0 : bind.textures2D.textures_num), + (bind_params.textures2D_without_sampler ? bind.textures2D.textures_num : 0), bind.samplers.samplers_num, (bind.gds_pointers.pointers_num > 0 ? 1 : 0)); (*set_layouts_num)++; } @@ -1498,12 +1555,16 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou // NOLINTNEXTLINE(readability-function-cognitive-complexity) static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const ShaderVertexInputInfo* vs_input_info, const Vector& vs_shader, const ShaderPixelInputInfo* ps_input_info, - const Vector& ps_shader, const PipelineParameters& params) + const Vector& ps_shader, const PipelineParameters* params, + const PipelineAdditionalParameters* additional_params) { EXIT_IF(g_render_ctx == nullptr); EXIT_IF(render_pass == nullptr); + EXIT_IF(params == nullptr); + EXIT_IF(additional_params == nullptr); - auto* pipeline = new VulkanPipeline; + auto* pipeline = new VulkanPipeline; + pipeline->additional_params = additional_params; auto* gctx = g_render_ctx->GetGraphicCtx(); @@ -1589,21 +1650,21 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; input_assembly.pNext = nullptr; input_assembly.flags = 0; - input_assembly.topology = params.topology; + input_assembly.topology = params->topology; input_assembly.primitiveRestartEnable = VK_FALSE; VkViewport viewport {}; - viewport.x = params.viewport_offset[0] - params.viewport_scale[0]; - viewport.y = params.viewport_offset[1] - params.viewport_scale[1]; - viewport.width = params.viewport_scale[0] * 2.0f; - viewport.height = params.viewport_scale[1] * 2.0f; - viewport.minDepth = params.viewport_offset[2]; - viewport.maxDepth = params.viewport_scale[2] + params.viewport_offset[2]; + viewport.x = params->viewport_offset[0] - params->viewport_scale[0]; + viewport.y = params->viewport_offset[1] - params->viewport_scale[1]; + viewport.width = params->viewport_scale[0] * 2.0f; + viewport.height = params->viewport_scale[1] * 2.0f; + viewport.minDepth = params->viewport_offset[2]; + viewport.maxDepth = params->viewport_scale[2] + params->viewport_offset[2]; VkRect2D scissor {}; - scissor.offset = {params.scissor_ltrb[0], params.scissor_ltrb[1]}; - scissor.extent = {static_cast(params.scissor_ltrb[2] - params.scissor_ltrb[0]), - static_cast(params.scissor_ltrb[3] - params.scissor_ltrb[1])}; + scissor.offset = {params->scissor_ltrb[0], params->scissor_ltrb[1]}; + scissor.extent = {static_cast(params->scissor_ltrb[2] - params->scissor_ltrb[0]), + static_cast(params->scissor_ltrb[3] - params->scissor_ltrb[1])}; VkPipelineViewportStateCreateInfo viewport_state {}; viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -1615,10 +1676,10 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh viewport_state.pScissors = &scissor; VkCullModeFlags cull_mode = VK_CULL_MODE_NONE; - cull_mode |= (params.cull_back ? VK_CULL_MODE_BACK_BIT : 0u); - cull_mode |= (params.cull_front ? VK_CULL_MODE_FRONT_BIT : 0u); + cull_mode |= (params->cull_back ? VK_CULL_MODE_BACK_BIT : 0u); + cull_mode |= (params->cull_front ? VK_CULL_MODE_FRONT_BIT : 0u); - VkFrontFace front_face = (params.face ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE); + VkFrontFace front_face = (params->face ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE); VkPipelineRasterizationDepthClipStateCreateInfoEXT clip_ext {}; clip_ext.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT; @@ -1654,31 +1715,31 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh VkColorComponentFlags color_write_mask = 0; - if (params.color_mask == 0xF) + if (params->color_mask == 0xF) { color_write_mask = static_cast(VK_COLOR_COMPONENT_R_BIT) | static_cast(VK_COLOR_COMPONENT_G_BIT) | static_cast(VK_COLOR_COMPONENT_B_BIT) | static_cast(VK_COLOR_COMPONENT_A_BIT); - } else if (params.color_mask == 0x0) + } else if (params->color_mask == 0x0) { color_write_mask = 0; } else { - EXIT("unknown mask: %u\n", params.color_mask); + EXIT("unknown mask: %u\n", params->color_mask); } VkPipelineColorBlendAttachmentState color_blend_attachment {}; color_blend_attachment.colorWriteMask = color_write_mask; - color_blend_attachment.blendEnable = params.blend_enable ? VK_TRUE : VK_FALSE; - color_blend_attachment.srcColorBlendFactor = get_blend_factor(params.color_srcblend); - color_blend_attachment.dstColorBlendFactor = get_blend_factor(params.color_destblend); - color_blend_attachment.colorBlendOp = get_blend_op(params.color_comb_fcn); + color_blend_attachment.blendEnable = params->blend_enable ? VK_TRUE : VK_FALSE; + color_blend_attachment.srcColorBlendFactor = get_blend_factor(params->color_srcblend); + color_blend_attachment.dstColorBlendFactor = get_blend_factor(params->color_destblend); + color_blend_attachment.colorBlendOp = get_blend_op(params->color_comb_fcn); color_blend_attachment.srcAlphaBlendFactor = - (params.separate_alpha_blend ? get_blend_factor(params.alpha_srcblend) : color_blend_attachment.srcColorBlendFactor); + (params->separate_alpha_blend ? get_blend_factor(params->alpha_srcblend) : color_blend_attachment.srcColorBlendFactor); color_blend_attachment.dstAlphaBlendFactor = - (params.separate_alpha_blend ? get_blend_factor(params.alpha_destblend) : color_blend_attachment.dstColorBlendFactor); + (params->separate_alpha_blend ? get_blend_factor(params->alpha_destblend) : color_blend_attachment.dstColorBlendFactor); color_blend_attachment.alphaBlendOp = - (params.separate_alpha_blend ? get_blend_op(params.alpha_comb_fcn) : color_blend_attachment.colorBlendOp); + (params->separate_alpha_blend ? get_blend_op(params->alpha_comb_fcn) : color_blend_attachment.colorBlendOp); VkPipelineColorBlendStateCreateInfo color_blending {}; color_blending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; @@ -1700,9 +1761,9 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh uint32_t push_constant_info_num = 0; CreateLayout(set_layouts, &set_layouts_num, push_constant_info, &push_constant_info_num, vs_input_info->bind, - VK_SHADER_STAGE_VERTEX_BIT, DescriptorCache::Stage::Vertex); + additional_params->vs_bind, VK_SHADER_STAGE_VERTEX_BIT, DescriptorCache::Stage::Vertex); CreateLayout(set_layouts, &set_layouts_num, push_constant_info, &push_constant_info_num, ps_input_info->bind, - VK_SHADER_STAGE_FRAGMENT_BIT, DescriptorCache::Stage::Pixel); + additional_params->ps_bind, VK_SHADER_STAGE_FRAGMENT_BIT, DescriptorCache::Stage::Pixel); VkPipelineLayoutCreateInfo pipeline_layout_info {}; pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; @@ -1723,15 +1784,15 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh depth_stencil_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depth_stencil_info.pNext = nullptr; depth_stencil_info.flags = 0; - depth_stencil_info.depthTestEnable = (params.depth_test_enable ? VK_TRUE : VK_FALSE); - depth_stencil_info.depthWriteEnable = (params.depth_write_enable ? VK_TRUE : VK_FALSE); - depth_stencil_info.depthCompareOp = params.depth_compare_op; - depth_stencil_info.depthBoundsTestEnable = (params.depth_bounds_test_enable ? VK_TRUE : VK_FALSE); - depth_stencil_info.stencilTestEnable = (params.stencil_test_enable ? VK_TRUE : VK_FALSE); + depth_stencil_info.depthTestEnable = (params->depth_test_enable ? VK_TRUE : VK_FALSE); + depth_stencil_info.depthWriteEnable = (params->depth_write_enable ? VK_TRUE : VK_FALSE); + depth_stencil_info.depthCompareOp = params->depth_compare_op; + depth_stencil_info.depthBoundsTestEnable = (params->depth_bounds_test_enable ? VK_TRUE : VK_FALSE); + depth_stencil_info.stencilTestEnable = (params->stencil_test_enable ? VK_TRUE : VK_FALSE); depth_stencil_info.front = {}; depth_stencil_info.back = {}; - depth_stencil_info.minDepthBounds = params.depth_min_bounds; - depth_stencil_info.maxDepthBounds = params.depth_max_bounds; + depth_stencil_info.minDepthBounds = params->depth_min_bounds; + depth_stencil_info.maxDepthBounds = params->depth_max_bounds; VkGraphicsPipelineCreateInfo pipeline_info {}; pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -1745,7 +1806,7 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh pipeline_info.pViewportState = &viewport_state; pipeline_info.pRasterizationState = &rasterizer; pipeline_info.pMultisampleState = &multisampling; - pipeline_info.pDepthStencilState = (params.with_depth ? &depth_stencil_info : nullptr); + pipeline_info.pDepthStencilState = (params->with_depth ? &depth_stencil_info : nullptr); pipeline_info.pColorBlendState = &color_blending; pipeline_info.pDynamicState = nullptr; pipeline_info.layout = pipeline->pipeline_layout; @@ -1767,11 +1828,15 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh } // NOLINTNEXTLINE(readability-function-cognitive-complexity) -static VulkanPipeline* CreatePipelineInternal(const ShaderComputeInputInfo* input_info, const Vector& cs_shader) +static VulkanPipeline* CreatePipelineInternal(const ShaderComputeInputInfo* input_info, const Vector& cs_shader, + const PipelineParameters* params, const PipelineAdditionalParameters* additional_params) { EXIT_IF(g_render_ctx == nullptr); + EXIT_IF(params == nullptr); + EXIT_IF(additional_params == nullptr); - auto* pipeline = new VulkanPipeline; + auto* pipeline = new VulkanPipeline; + pipeline->additional_params = additional_params; auto* gctx = g_render_ctx->GetGraphicCtx(); @@ -1805,8 +1870,8 @@ static VulkanPipeline* CreatePipelineInternal(const ShaderComputeInputInfo* inpu VkPushConstantRange push_constant_info[1]; uint32_t push_constant_info_num = 0; - CreateLayout(set_layouts, &set_layouts_num, push_constant_info, &push_constant_info_num, input_info->bind, VK_SHADER_STAGE_COMPUTE_BIT, - DescriptorCache::Stage::Compute); + CreateLayout(set_layouts, &set_layouts_num, push_constant_info, &push_constant_info_num, input_info->bind, additional_params->cs_bind, + VK_SHADER_STAGE_COMPUTE_BIT, DescriptorCache::Stage::Compute); VkPipelineLayoutCreateInfo pipeline_layout_info {}; pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; @@ -1843,21 +1908,31 @@ static VulkanPipeline* CreatePipelineInternal(const ShaderComputeInputInfo* inpu return pipeline; } -static void DeletePipelineInternal(VulkanPipeline* pipeline) +void PipelineCache::DeletePipelineInternal(Pipeline& p) { EXIT_IF(g_render_ctx == nullptr); - EXIT_IF(pipeline == nullptr); - EXIT_IF(pipeline->pipeline == nullptr); - EXIT_IF(pipeline->pipeline_layout == nullptr); + EXIT_IF(p.pipeline == nullptr); + EXIT_IF(p.pipeline->pipeline == nullptr); + EXIT_IF(p.pipeline->pipeline_layout == nullptr); + EXIT_IF(p.params == nullptr); + EXIT_IF(p.pipeline->additional_params == nullptr); + + delete p.params; + delete p.pipeline->additional_params; + + p.params = nullptr; + p.pipeline->additional_params = nullptr; auto* gctx = g_render_ctx->GetGraphicCtx(); EXIT_IF(gctx == nullptr); - vkDestroyPipeline(gctx->device, pipeline->pipeline, nullptr); - vkDestroyPipelineLayout(gctx->device, pipeline->pipeline_layout, nullptr); + vkDestroyPipeline(gctx->device, p.pipeline->pipeline, nullptr); + vkDestroyPipelineLayout(gctx->device, p.pipeline->pipeline_layout, nullptr); - delete pipeline; + delete p.pipeline; + + p.pipeline = nullptr; } bool PipelineParameters::operator==(const PipelineParameters& other) const @@ -1870,7 +1945,7 @@ VulkanPipeline* PipelineCache::Find(const Pipeline& p) const for (const auto& pn: m_pipelines) { if (pn.pipeline != nullptr && p.render_pass_id == pn.render_pass_id && p.vs_shader_id == pn.vs_shader_id && - p.ps_shader_id == pn.ps_shader_id && p.cs_shader_id == pn.cs_shader_id && p.params == pn.params) + p.ps_shader_id == pn.ps_shader_id && p.cs_shader_id == pn.cs_shader_id && *p.params == *pn.params) { return pn.pipeline; } @@ -1901,40 +1976,42 @@ VulkanPipeline* PipelineCache::CreatePipeline(VulkanFramebuffer* framebuffer, Re auto ps_id = ShaderGetIdPS(ps_regs, ps_input_info); Pipeline p {}; - p.render_pass_id = framebuffer->render_pass_id; - p.ps_shader_id = ps_id; - p.vs_shader_id = vs_id; - p.params.viewport_scale[0] = vp.viewports[0].xscale; - p.params.viewport_scale[1] = vp.viewports[0].yscale; - p.params.viewport_scale[2] = vp.viewports[0].zscale; - p.params.viewport_offset[0] = vp.viewports[0].xoffset; - p.params.viewport_offset[1] = vp.viewports[0].yoffset; - p.params.viewport_offset[2] = vp.viewports[0].zoffset; - p.params.scissor_ltrb[0] = vp.scissor_left; - p.params.scissor_ltrb[1] = vp.scissor_top; - p.params.scissor_ltrb[2] = vp.scissor_right; - p.params.scissor_ltrb[3] = vp.scissor_bottom; - p.params.topology = topology; - p.params.with_depth = (depth->format != VK_FORMAT_UNDEFINED && depth->vulkan_buffer != nullptr); - p.params.depth_test_enable = depth->depth_test_enable; - p.params.depth_write_enable = (depth->depth_write_enable && !depth->depth_clear_enable); - p.params.depth_compare_op = depth->depth_compare_op; - p.params.depth_bounds_test_enable = depth->depth_bounds_test_enable; - p.params.depth_min_bounds = depth->depth_min_bounds; - p.params.depth_max_bounds = depth->depth_max_bounds; - p.params.stencil_test_enable = depth->stencil_test_enable; - p.params.color_mask = color_mask; - p.params.cull_back = mc.cull_back; - p.params.cull_front = mc.cull_front; - p.params.face = mc.face; - p.params.color_srcblend = bc.color_srcblend; - p.params.color_comb_fcn = bc.color_comb_fcn; - p.params.color_destblend = bc.color_destblend; - p.params.alpha_srcblend = bc.alpha_srcblend; - p.params.alpha_comb_fcn = bc.alpha_comb_fcn; - p.params.alpha_destblend = bc.alpha_destblend; - p.params.separate_alpha_blend = bc.separate_alpha_blend; - p.params.blend_enable = bc.enable; + p.render_pass_id = framebuffer->render_pass_id; + p.ps_shader_id = ps_id; + p.vs_shader_id = vs_id; + p.params = new PipelineParameters; + + p.params->viewport_scale[0] = vp.viewports[0].xscale; + p.params->viewport_scale[1] = vp.viewports[0].yscale; + p.params->viewport_scale[2] = vp.viewports[0].zscale; + p.params->viewport_offset[0] = vp.viewports[0].xoffset; + p.params->viewport_offset[1] = vp.viewports[0].yoffset; + p.params->viewport_offset[2] = vp.viewports[0].zoffset; + p.params->scissor_ltrb[0] = vp.scissor_left; + p.params->scissor_ltrb[1] = vp.scissor_top; + p.params->scissor_ltrb[2] = vp.scissor_right; + p.params->scissor_ltrb[3] = vp.scissor_bottom; + p.params->topology = topology; + p.params->with_depth = (depth->format != VK_FORMAT_UNDEFINED && depth->vulkan_buffer != nullptr); + p.params->depth_test_enable = depth->depth_test_enable; + p.params->depth_write_enable = (depth->depth_write_enable && !depth->depth_clear_enable); + p.params->depth_compare_op = depth->depth_compare_op; + p.params->depth_bounds_test_enable = depth->depth_bounds_test_enable; + p.params->depth_min_bounds = depth->depth_min_bounds; + p.params->depth_max_bounds = depth->depth_max_bounds; + p.params->stencil_test_enable = depth->stencil_test_enable; + p.params->color_mask = color_mask; + p.params->cull_back = mc.cull_back; + p.params->cull_front = mc.cull_front; + p.params->face = mc.face; + p.params->color_srcblend = bc.color_srcblend; + p.params->color_comb_fcn = bc.color_comb_fcn; + p.params->color_destblend = bc.color_destblend; + p.params->alpha_srcblend = bc.alpha_srcblend; + p.params->alpha_comb_fcn = bc.alpha_comb_fcn; + p.params->alpha_destblend = bc.alpha_destblend; + p.params->separate_alpha_blend = bc.separate_alpha_blend; + p.params->blend_enable = bc.enable; auto* found = Find(p); @@ -1943,13 +2020,21 @@ VulkanPipeline* PipelineCache::CreatePipeline(VulkanFramebuffer* framebuffer, Re return found; } - auto vs_shader = ShaderRecompileVS(vs_regs, vs_input_info); - auto ps_shader = ShaderRecompilePS(ps_regs, ps_input_info); + auto vs_code = ShaderParseVS(vs_regs); + auto ps_code = ShaderParsePS(ps_regs); + + auto* params2 = new PipelineAdditionalParameters; + + params2->vs_bind = ShaderGetBindParametersVS(vs_code, vs_input_info); + params2->ps_bind = ShaderGetBindParametersPS(ps_code, ps_input_info); + + auto vs_shader = ShaderRecompileVS(vs_code, vs_input_info); + auto ps_shader = ShaderRecompilePS(ps_code, ps_input_info); EXIT_IF(vs_shader.IsEmpty()); EXIT_IF(ps_shader.IsEmpty()); - p.pipeline = CreatePipelineInternal(framebuffer->render_pass, vs_input_info, vs_shader, ps_input_info, ps_shader, p.params); + p.pipeline = CreatePipelineInternal(framebuffer->render_pass, vs_input_info, vs_shader, ps_input_info, ps_shader, p.params, params2); EXIT_NOT_IMPLEMENTED(p.pipeline == nullptr); @@ -1969,7 +2054,7 @@ VulkanPipeline* PipelineCache::CreatePipeline(VulkanFramebuffer* framebuffer, Re if (m_pipelines.Size() >= PipelineCache::MAX_PIPELINES) { auto& pn = m_pipelines[Math::Rand::UintInclusiveRange(0, m_pipelines.Size() - 1)]; - DeletePipelineInternal(pn.pipeline); + DeletePipelineInternal(pn); pn = p; } else { @@ -1994,6 +2079,7 @@ VulkanPipeline* PipelineCache::CreatePipeline(const ShaderComputeInputInfo* inpu Pipeline p {}; p.cs_shader_id = cs_id; + p.params = new PipelineParameters; auto* found = Find(p); @@ -2002,11 +2088,16 @@ VulkanPipeline* PipelineCache::CreatePipeline(const ShaderComputeInputInfo* inpu return found; } - auto cs_shader = ShaderRecompileCS(cs_regs, input_info); + auto cs_code = ShaderParseCS(cs_regs); + auto* params2 = new PipelineAdditionalParameters; + + params2->cs_bind = ShaderGetBindParametersCS(cs_code, input_info); + + auto cs_shader = ShaderRecompileCS(cs_code, input_info); EXIT_IF(cs_shader.IsEmpty()); - p.pipeline = CreatePipelineInternal(input_info, cs_shader); + p.pipeline = CreatePipelineInternal(input_info, cs_shader, p.params, params2); EXIT_NOT_IMPLEMENTED(p.pipeline == nullptr); @@ -2026,7 +2117,7 @@ VulkanPipeline* PipelineCache::CreatePipeline(const ShaderComputeInputInfo* inpu if (m_pipelines.Size() >= PipelineCache::MAX_PIPELINES) { auto& pn = m_pipelines[Math::Rand::UintInclusiveRange(0, m_pipelines.Size() - 1)]; - DeletePipelineInternal(pn.pipeline); + DeletePipelineInternal(pn); pn = p; } else { @@ -2047,7 +2138,7 @@ void PipelineCache::DeletePipeline(VulkanPipeline* pipeline) if (m_pipelines.IndexValid(index)) { - DeletePipelineInternal(m_pipelines.At(index).pipeline); + DeletePipelineInternal(m_pipelines[index]); // m_pipelines.RemoveAt(index); m_pipelines[index].pipeline = nullptr; } @@ -2063,7 +2154,7 @@ void PipelineCache::DeletePipelines(VulkanFramebuffer* framebuffer) { if (p.pipeline != nullptr && p.render_pass_id == framebuffer->render_pass_id) { - DeletePipelineInternal(p.pipeline); + DeletePipelineInternal(p); p.pipeline = nullptr; } } @@ -2075,19 +2166,19 @@ void PipelineCache::DeleteAllPipelines() for (auto& p: m_pipelines) { - DeletePipelineInternal(p.pipeline); + DeletePipelineInternal(p); p.pipeline = nullptr; } // m_pipelines.Clear(); } -static void create_layout(GraphicContext* gctx, int storage_buffers_num, int textures2d_num, int samplers_num, int gds_buffers_num, - VkShaderStageFlags stage, VkDescriptorSetLayout* dst) +static void create_layout(GraphicContext* gctx, int storage_buffers_num, int textures2d_sampled_num, int textures2d_storage_num, + int samplers_num, int gds_buffers_num, VkShaderStageFlags stage, VkDescriptorSetLayout* dst) { uint32_t binding_num = 0; - VkDescriptorSetLayoutBinding ubo_layout_binding[4] = {}; + VkDescriptorSetLayoutBinding ubo_layout_binding[5] = {}; if (storage_buffers_num > 0) { @@ -2100,12 +2191,23 @@ static void create_layout(GraphicContext* gctx, int storage_buffers_num, int tex binding_num++; } - if (textures2d_num > 0) + if (textures2d_sampled_num > 0) { EXIT_IF(binding_num >= sizeof(ubo_layout_binding) / sizeof(ubo_layout_binding[0])); ubo_layout_binding[binding_num].binding = binding_num; ubo_layout_binding[binding_num].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ubo_layout_binding[binding_num].descriptorCount = textures2d_num; + ubo_layout_binding[binding_num].descriptorCount = textures2d_sampled_num; + ubo_layout_binding[binding_num].stageFlags = stage; + ubo_layout_binding[binding_num].pImmutableSamplers = nullptr; + binding_num++; + } + + if (textures2d_storage_num > 0) + { + EXIT_IF(binding_num >= sizeof(ubo_layout_binding) / sizeof(ubo_layout_binding[0])); + ubo_layout_binding[binding_num].binding = binding_num; + ubo_layout_binding[binding_num].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ubo_layout_binding[binding_num].descriptorCount = textures2d_storage_num; ubo_layout_binding[binding_num].stageFlags = stage; ubo_layout_binding[binding_num].pImmutableSamplers = nullptr; binding_num++; @@ -2163,24 +2265,30 @@ void DescriptorCache::Init() auto* gctx = g_render_ctx->GetGraphicCtx(); EXIT_IF(gctx == nullptr); - EXIT_IF(m_descriptor_set_layout_vertex[0][0][0][0] != nullptr); - EXIT_IF(m_descriptor_set_layout_pixel[0][0][0][0] != nullptr); - EXIT_IF(m_descriptor_set_layout_compute[0][0][0][0] != nullptr); + EXIT_IF(m_descriptor_set_layout_vertex[0][0][0][0][0] != nullptr); + EXIT_IF(m_descriptor_set_layout_pixel[0][0][0][0][0] != nullptr); + EXIT_IF(m_descriptor_set_layout_compute[0][0][0][0][0] != nullptr); for (int buffers_num_i = 0; buffers_num_i <= BUFFERS_MAX; buffers_num_i++) { - for (int buffers_num_j = 0; buffers_num_j <= TEXTURES_MAX; buffers_num_j++) + for (int buffers_num_j = 0; buffers_num_j <= TEXTURES_SAMPLED_MAX; buffers_num_j++) { - for (int buffers_num_k = 0; buffers_num_k <= SAMPLERS_MAX; buffers_num_k++) + for (int buffers_num_j2 = 0; buffers_num_j2 <= TEXTURES_STORAGE_MAX; buffers_num_j2++) { - for (int buffers_num_l = 0; buffers_num_l <= GDS_BUFFER_MAX; buffers_num_l++) + for (int buffers_num_k = 0; buffers_num_k <= SAMPLERS_MAX; buffers_num_k++) { - create_layout(gctx, buffers_num_i, buffers_num_j, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_FRAGMENT_BIT, - &m_descriptor_set_layout_pixel[buffers_num_i][buffers_num_j][buffers_num_k][buffers_num_l]); - create_layout(gctx, buffers_num_i, buffers_num_j, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_VERTEX_BIT, - &m_descriptor_set_layout_vertex[buffers_num_i][buffers_num_j][buffers_num_k][buffers_num_l]); - create_layout(gctx, buffers_num_i, buffers_num_j, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_COMPUTE_BIT, - &m_descriptor_set_layout_compute[buffers_num_i][buffers_num_j][buffers_num_k][buffers_num_l]); + for (int buffers_num_l = 0; buffers_num_l <= GDS_BUFFER_MAX; buffers_num_l++) + { + create_layout( + gctx, buffers_num_i, buffers_num_j, buffers_num_j2, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_FRAGMENT_BIT, + &m_descriptor_set_layout_pixel[buffers_num_i][buffers_num_j][buffers_num_j2][buffers_num_k][buffers_num_l]); + create_layout( + gctx, buffers_num_i, buffers_num_j, buffers_num_j2, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_VERTEX_BIT, + &m_descriptor_set_layout_vertex[buffers_num_i][buffers_num_j][buffers_num_j2][buffers_num_k][buffers_num_l]); + create_layout( + gctx, buffers_num_i, buffers_num_j, buffers_num_j2, buffers_num_k, buffers_num_l, VK_SHADER_STAGE_COMPUTE_BIT, + &m_descriptor_set_layout_compute[buffers_num_i][buffers_num_j][buffers_num_j2][buffers_num_k][buffers_num_l]); + } } } } @@ -2194,19 +2302,21 @@ void DescriptorCache::CreatePool() auto* gctx = g_render_ctx->GetGraphicCtx(); EXIT_IF(gctx == nullptr); - VkDescriptorPoolSize pool_size[3]; + VkDescriptorPoolSize pool_size[4]; pool_size[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; pool_size[0].descriptorCount = 32; pool_size[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; pool_size[1].descriptorCount = 32; - pool_size[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_size[2].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; pool_size[2].descriptorCount = 32; + pool_size[3].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_size[3].descriptorCount = 32; VkDescriptorPoolCreateInfo pool_info {}; pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; pool_info.pNext = nullptr; pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - pool_info.poolSizeCount = 3; + pool_info.poolSizeCount = 4; pool_info.pPoolSizes = pool_size; pool_info.maxSets = 32; @@ -2219,11 +2329,12 @@ void DescriptorCache::CreatePool() m_pools.Add(pool); } -VulkanDescriptorSet* DescriptorCache::Allocate(Stage stage, int storage_buffers_num, int textures2d_num, int samplers_num, - int gds_buffers_num) +VulkanDescriptorSet* DescriptorCache::Allocate(Stage stage, int storage_buffers_num, int textures2d_sampled_num, int textures2d_storage_num, + int samplers_num, int gds_buffers_num) { EXIT_IF(storage_buffers_num < 0 || storage_buffers_num > BUFFERS_MAX); - EXIT_IF(textures2d_num < 0 || textures2d_num > TEXTURES_MAX); + EXIT_IF(textures2d_sampled_num < 0 || textures2d_sampled_num > TEXTURES_SAMPLED_MAX); + EXIT_IF(textures2d_storage_num < 0 || textures2d_storage_num > TEXTURES_STORAGE_MAX); EXIT_IF(samplers_num < 0 || samplers_num > SAMPLERS_MAX); EXIT_IF(gds_buffers_num < 0 || gds_buffers_num > GDS_BUFFER_MAX); @@ -2251,16 +2362,16 @@ VulkanDescriptorSet* DescriptorCache::Allocate(Stage stage, int storage_buffers_ switch (stage) { case Stage::Vertex: - alloc_info.pSetLayouts = - &m_descriptor_set_layout_vertex[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; + alloc_info.pSetLayouts = &m_descriptor_set_layout_vertex[storage_buffers_num][textures2d_sampled_num] + [textures2d_storage_num][samplers_num][gds_buffers_num]; break; case Stage::Pixel: - alloc_info.pSetLayouts = - &m_descriptor_set_layout_pixel[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; + alloc_info.pSetLayouts = &m_descriptor_set_layout_pixel[storage_buffers_num][textures2d_sampled_num] + [textures2d_storage_num][samplers_num][gds_buffers_num]; break; case Stage::Compute: - alloc_info.pSetLayouts = - &m_descriptor_set_layout_compute[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; + alloc_info.pSetLayouts = &m_descriptor_set_layout_compute[storage_buffers_num][textures2d_sampled_num] + [textures2d_storage_num][samplers_num][gds_buffers_num]; break; default: EXIT("unknown stage\n"); } @@ -2299,11 +2410,13 @@ void DescriptorCache::Free(VulkanDescriptorSet* set) // NOLINTNEXTLINE(readability-function-cognitive-complexity) VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buffers_num, VulkanBuffer** storage_buffers, - int textures2d_num, TextureVulkanImage** textures2d, int samplers_num, + int textures2d_sampled_num, TextureVulkanImage** textures2d_sampled, + int textures2d_storage_num, TextureVulkanImage** textures2d_storage, int samplers_num, uint64_t* samplers, int gds_buffers_num, VulkanBuffer** gds_buffers) { EXIT_IF(storage_buffers_num < 0 || storage_buffers_num > BUFFERS_MAX); - EXIT_IF(textures2d_num < 0 || textures2d_num > TEXTURES_MAX); + EXIT_IF(textures2d_sampled_num < 0 || textures2d_sampled_num > TEXTURES_SAMPLED_MAX); + EXIT_IF(textures2d_storage_num < 0 || textures2d_storage_num > TEXTURES_STORAGE_MAX); EXIT_IF(samplers_num < 0 || samplers_num > SAMPLERS_MAX); EXIT_IF(storage_buffers == nullptr); @@ -2315,7 +2428,8 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf for (auto& set: m_sets) { if (set.set != nullptr && set.stage == stage && set.storage_buffers_num == storage_buffers_num && - set.textures2d_num == textures2d_num && set.samplers_num == samplers_num && set.gds_buffers_num == gds_buffers_num) + set.textures2d_sampled_num == textures2d_sampled_num && set.textures2d_storage_num == textures2d_storage_num && + set.samplers_num == samplers_num && set.gds_buffers_num == gds_buffers_num) { bool match = true; for (int i = 0; i < storage_buffers_num; i++) @@ -2328,9 +2442,20 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf } if (match) { - for (int i = 0; i < textures2d_num; i++) + for (int i = 0; i < textures2d_sampled_num; i++) { - if (textures2d[i]->memory.unique_id != set.textures2d_id[i]) + if (textures2d_sampled[i]->memory.unique_id != set.textures2d_sampled_id[i]) + { + match = false; + break; + } + } + } + if (match) + { + for (int i = 0; i < textures2d_storage_num; i++) + { + if (textures2d_storage[i]->memory.unique_id != set.textures2d_storage_id[i]) { match = false; break; @@ -2366,7 +2491,7 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf } } - auto* new_set = Allocate(stage, storage_buffers_num, textures2d_num, samplers_num, gds_buffers_num); + auto* new_set = Allocate(stage, storage_buffers_num, textures2d_sampled_num, textures2d_storage_num, samplers_num, gds_buffers_num); EXIT_NOT_IMPLEMENTED(new_set == nullptr); VkDescriptorBufferInfo buffer_info[BUFFERS_MAX] {}; @@ -2377,12 +2502,20 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf buffer_info[i].range = VK_WHOLE_SIZE; } - VkDescriptorImageInfo texture2d_info[TEXTURES_MAX] {}; - for (int i = 0; i < textures2d_num; i++) + VkDescriptorImageInfo texture2d_sampled_info[TEXTURES_SAMPLED_MAX] {}; + for (int i = 0; i < textures2d_sampled_num; i++) { - texture2d_info[i].sampler = nullptr; - texture2d_info[i].imageView = textures2d[i]->image_view; - texture2d_info[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + texture2d_sampled_info[i].sampler = nullptr; + texture2d_sampled_info[i].imageView = textures2d_sampled[i]->image_view; + texture2d_sampled_info[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + VkDescriptorImageInfo texture2d_storage_info[TEXTURES_STORAGE_MAX] {}; + for (int i = 0; i < textures2d_storage_num; i++) + { + texture2d_storage_info[i].sampler = nullptr; + texture2d_storage_info[i].imageView = textures2d_storage[i]->image_view; + texture2d_storage_info[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; } VkDescriptorImageInfo sampler_info[SAMPLERS_MAX] {}; @@ -2403,7 +2536,7 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf int binding_num = 0; - VkWriteDescriptorSet descriptor_write[4] = {}; + VkWriteDescriptorSet descriptor_write[5] = {}; if (storage_buffers_num > 0) { @@ -2421,7 +2554,7 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf binding_num++; } - if (textures2d_num > 0) + if (textures2d_sampled_num > 0) { EXIT_IF(binding_num >= static_cast(sizeof(descriptor_write) / sizeof(descriptor_write[0]))); descriptor_write[binding_num].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -2430,9 +2563,25 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf descriptor_write[binding_num].dstBinding = binding_num; descriptor_write[binding_num].dstArrayElement = 0; descriptor_write[binding_num].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor_write[binding_num].descriptorCount = textures2d_num; + descriptor_write[binding_num].descriptorCount = textures2d_sampled_num; descriptor_write[binding_num].pBufferInfo = nullptr; - descriptor_write[binding_num].pImageInfo = texture2d_info; + descriptor_write[binding_num].pImageInfo = texture2d_sampled_info; + descriptor_write[binding_num].pTexelBufferView = nullptr; + binding_num++; + } + + if (textures2d_storage_num > 0) + { + EXIT_IF(binding_num >= static_cast(sizeof(descriptor_write) / sizeof(descriptor_write[0]))); + descriptor_write[binding_num].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_write[binding_num].pNext = nullptr; + descriptor_write[binding_num].dstSet = new_set->set; + descriptor_write[binding_num].dstBinding = binding_num; + descriptor_write[binding_num].dstArrayElement = 0; + descriptor_write[binding_num].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptor_write[binding_num].descriptorCount = textures2d_storage_num; + descriptor_write[binding_num].pBufferInfo = nullptr; + descriptor_write[binding_num].pImageInfo = texture2d_storage_info; descriptor_write[binding_num].pTexelBufferView = nullptr; binding_num++; } @@ -2472,19 +2621,24 @@ VulkanDescriptorSet* DescriptorCache::GetDescriptor(Stage stage, int storage_buf vkUpdateDescriptorSets(gctx->device, binding_num, descriptor_write, 0, nullptr); Set nset; - nset.set = new_set; - nset.storage_buffers_num = storage_buffers_num; - nset.textures2d_num = textures2d_num; - nset.samplers_num = samplers_num; - nset.gds_buffers_num = gds_buffers_num; - nset.stage = stage; + nset.set = new_set; + nset.storage_buffers_num = storage_buffers_num; + nset.textures2d_sampled_num = textures2d_sampled_num; + nset.textures2d_storage_num = textures2d_storage_num; + nset.samplers_num = samplers_num; + nset.gds_buffers_num = gds_buffers_num; + nset.stage = stage; for (int i = 0; i < storage_buffers_num; i++) { nset.storage_buffers_id[i] = storage_buffers[i]->memory.unique_id; } - for (int i = 0; i < textures2d_num; i++) + for (int i = 0; i < textures2d_sampled_num; i++) { - nset.textures2d_id[i] = textures2d[i]->memory.unique_id; + nset.textures2d_sampled_id[i] = textures2d_sampled[i]->memory.unique_id; + } + for (int i = 0; i < textures2d_storage_num; i++) + { + nset.textures2d_storage_id[i] = textures2d_storage[i]->memory.unique_id; } for (int i = 0; i < samplers_num; i++) { @@ -2542,9 +2696,9 @@ void DescriptorCache::FreeDescriptor(TextureVulkanImage* image) { if (set.set != nullptr) { - for (int i = 0; i < set.textures2d_num; i++) + for (int i = 0; i < set.textures2d_sampled_num; i++) { - if (set.textures2d_id[i] == image->memory.unique_id) + if (set.textures2d_sampled_id[i] == image->memory.unique_id) { Free(set.set); set.set = nullptr; @@ -2555,23 +2709,30 @@ void DescriptorCache::FreeDescriptor(TextureVulkanImage* image) } } -VkDescriptorSetLayout DescriptorCache::GetDescriptorSetLayout(Stage stage, int storage_buffers_num, int textures2d_num, int samplers_num, - int gds_buffers_num) +VkDescriptorSetLayout DescriptorCache::GetDescriptorSetLayout(Stage stage, int storage_buffers_num, int textures2d_sampled_num, + int textures2d_storage_num, int samplers_num, int gds_buffers_num) { EXIT_IF(storage_buffers_num < 0 || storage_buffers_num > BUFFERS_MAX); - EXIT_IF(textures2d_num < 0 || textures2d_num > TEXTURES_MAX); + EXIT_IF(textures2d_sampled_num < 0 || textures2d_sampled_num > TEXTURES_SAMPLED_MAX); + EXIT_IF(textures2d_storage_num < 0 || textures2d_storage_num > TEXTURES_STORAGE_MAX); EXIT_IF(samplers_num < 0 || samplers_num > SAMPLERS_MAX); EXIT_IF(gds_buffers_num < 0 || gds_buffers_num > GDS_BUFFER_MAX); - EXIT_NOT_IMPLEMENTED(stage != Stage::Pixel && (textures2d_num > 0 || samplers_num > 0)); + EXIT_NOT_IMPLEMENTED(stage != Stage::Pixel && (textures2d_sampled_num > 0 || textures2d_storage_num > 0 || samplers_num > 0)); Core::LockGuard lock(m_mutex); Init(); switch (stage) { - case Stage::Vertex: return m_descriptor_set_layout_vertex[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; - case Stage::Pixel: return m_descriptor_set_layout_pixel[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; - case Stage::Compute: return m_descriptor_set_layout_compute[storage_buffers_num][textures2d_num][samplers_num][gds_buffers_num]; + case Stage::Vertex: + return m_descriptor_set_layout_vertex[storage_buffers_num][textures2d_sampled_num][textures2d_storage_num][samplers_num] + [gds_buffers_num]; + case Stage::Pixel: + return m_descriptor_set_layout_pixel[storage_buffers_num][textures2d_sampled_num][textures2d_storage_num][samplers_num] + [gds_buffers_num]; + case Stage::Compute: + return m_descriptor_set_layout_compute[storage_buffers_num][textures2d_sampled_num][textures2d_storage_num][samplers_num] + [gds_buffers_num]; default: EXIT("unknown stage\n"); } return nullptr; @@ -2708,13 +2869,55 @@ static void FindRenderColorInfo(const HardwareContext& hw, RenderColorInfo* r) if (rt.base_addr != 0) { + auto width = rt.width; + auto height = rt.height; + auto pitch = (rt.pitch_div8_minus1 + 1) * 8; + auto size = (rt.slice_div64_minus1 + 1) * 64 * 4; + bool tile = false; + uint32_t format = 0; + + if (rt.tile_mode == 0x8) + { + tile = false; + } else if (rt.tile_mode == 0xa) + { + tile = true; + } else + { + EXIT("unknown tile mode: %u\n", rt.tile_mode); + } + + if (rt.format == 0xa && rt.channel_type == 0x6 && rt.channel_order == 0x1) + { + format = 0x80000000; + } else + { + EXIT("unknown format"); + } + auto video_image = VideoOut::VideoOutGetImage(rt.base_addr); - EXIT_NOT_IMPLEMENTED(video_image.image == nullptr); - r->base_addr = rt.base_addr; - r->vulkan_buffer = video_image.image; - r->buffer_size = video_image.buffer_size; + if (video_image.image == nullptr) + { + // Offscreen buffer + VideoOutBufferObject vulkan_buffer_info(format, width, height, tile, Config::IsNeo(), pitch); + auto* buffer_vulkan = static_cast( + Graphics::GpuMemoryGetObject(g_render_ctx->GetGraphicCtx(), rt.base_addr, size, vulkan_buffer_info)); + EXIT_NOT_IMPLEMENTED(buffer_vulkan == nullptr); + r->base_addr = rt.base_addr; + r->vulkan_buffer = buffer_vulkan; + r->buffer_size = size; + } else + { + // Display buffer + EXIT_NOT_IMPLEMENTED(video_image.buffer_size != size); + EXIT_NOT_IMPLEMENTED(video_image.buffer_pitch != pitch); + r->base_addr = rt.base_addr; + r->vulkan_buffer = video_image.image; + r->buffer_size = video_image.buffer_size; + } } else { + // No color output r->base_addr = 0; r->vulkan_buffer = nullptr; r->buffer_size = 0; @@ -2782,7 +2985,7 @@ static void PrepareStorageBuffers(const ShaderStorageResources& storage_buffers, } // NOLINTNEXTLINE(readability-function-cognitive-complexity) -static void PrepareTextures(const ShaderTextureResources& textures, TextureVulkanImage** images, uint32_t** sgprs) +static void PrepareTextures(const ShaderTextureResources& textures, TextureVulkanImage** images, uint32_t** sgprs, bool with_sampler) { EXIT_IF(images == nullptr); EXIT_IF(sgprs == nullptr); @@ -2796,8 +2999,8 @@ static void PrepareTextures(const ShaderTextureResources& textures, TextureVulka EXIT_NOT_IMPLEMENTED(r.MinLod() != 0); EXIT_NOT_IMPLEMENTED(r.Dfmt() != 10 && r.Dfmt() != 37); EXIT_NOT_IMPLEMENTED(r.Nfmt() != 9); - EXIT_NOT_IMPLEMENTED(r.Width() != 511); - EXIT_NOT_IMPLEMENTED(r.Height() != 511); + // EXIT_NOT_IMPLEMENTED(r.Width() != 511); + // EXIT_NOT_IMPLEMENTED(r.Height() != 511); EXIT_NOT_IMPLEMENTED(r.PerfMod() != 7 && r.PerfMod() != 0); EXIT_NOT_IMPLEMENTED(r.Interlaced() != false); // EXIT_NOT_IMPLEMENTED(r.DstSelX() != 4); @@ -2810,7 +3013,7 @@ static void PrepareTextures(const ShaderTextureResources& textures, TextureVulka EXIT_NOT_IMPLEMENTED(!(r.LastLevel() == 0 && r.Pow2Pad() == false) && !(r.LastLevel() == 9 && r.Pow2Pad() == true)); EXIT_NOT_IMPLEMENTED(r.Type() != 9); EXIT_NOT_IMPLEMENTED(r.Depth() != 0); - EXIT_NOT_IMPLEMENTED(r.Pitch() != 511); + // EXIT_NOT_IMPLEMENTED(r.Pitch() != 511); EXIT_NOT_IMPLEMENTED(r.BaseArray() != 0); EXIT_NOT_IMPLEMENTED(r.LastArray() != 0); EXIT_NOT_IMPLEMENTED(r.MinLodWarn() != 0); @@ -2823,15 +3026,17 @@ static void PrepareTextures(const ShaderTextureResources& textures, TextureVulka bool neo = Config::IsNeo(); auto width = r.Width() + 1; auto height = r.Height() + 1; + auto pitch = r.Pitch() + 1; auto levels = r.LastLevel() - r.BaseLevel() + 1; bool tile = (r.TilingIdx() != 8); uint32_t swizzle = static_cast(r.DstSelX()) | (static_cast(r.DstSelY()) << 8u) | (static_cast(r.DstSelZ()) << 16u) | (static_cast(r.DstSelW()) << 24u); - TileGetTextureSize(r.Dfmt(), r.Nfmt(), width, height, levels, tile, neo, &size, nullptr, nullptr, nullptr); + TileGetTextureSize(r.Dfmt(), r.Nfmt(), width, height, pitch, levels, tile, neo, &size, nullptr, nullptr, nullptr); EXIT_NOT_IMPLEMENTED(size == 0); - TextureObject vulkan_texture_info(r.Dfmt(), r.Nfmt(), width, height, levels, tile, neo, swizzle); + TextureObject vulkan_texture_info(r.Dfmt(), r.Nfmt(), width, height, pitch, levels, tile, neo, swizzle, + (with_sampler ? TextureObject::TEXTURE_USAGE_SAMPLED : TextureObject::TEXTURE_USAGE_STORAGE)); auto* tex = static_cast(GpuMemoryGetObject(g_render_ctx->GetGraphicCtx(), addr, size, vulkan_texture_info)); @@ -2927,17 +3132,21 @@ static void PrepareGdsPointers(const ShaderGdsResources& gds_pointers, uint32_t* } static void BindDescriptors(VkCommandBuffer vk_buffer, VkPipelineBindPoint pipeline_bind_point, VkPipelineLayout layout, - const ShaderResources& bind, VkShaderStageFlags vk_stage, DescriptorCache::Stage stage) + const ShaderBindResources& bind, const ShaderBindParameters& bind_params, VkShaderStageFlags vk_stage, + DescriptorCache::Stage stage) { if (bind.push_constant_size > 0) { EXIT_NOT_IMPLEMENTED(bind.push_constant_size > DescriptorCache::PUSH_CONSTANTS_MAX * 4); EXIT_NOT_IMPLEMENTED(bind.storage_buffers.buffers_num > DescriptorCache::BUFFERS_MAX); - EXIT_NOT_IMPLEMENTED(bind.textures2D.textures_num > DescriptorCache::TEXTURES_MAX); + EXIT_NOT_IMPLEMENTED( + (bind_params.textures2D_without_sampler && bind.textures2D.textures_num > DescriptorCache::TEXTURES_STORAGE_MAX) || + (!bind_params.textures2D_without_sampler && bind.textures2D.textures_num > DescriptorCache::TEXTURES_SAMPLED_MAX)); EXIT_NOT_IMPLEMENTED(bind.samplers.samplers_num > DescriptorCache::SAMPLERS_MAX); VulkanBuffer* storage_buffers[DescriptorCache::BUFFERS_MAX]; - TextureVulkanImage* textures2d[DescriptorCache::TEXTURES_MAX]; + TextureVulkanImage* textures2d_sampled[DescriptorCache::TEXTURES_SAMPLED_MAX]; + TextureVulkanImage* textures2d_storage[DescriptorCache::TEXTURES_STORAGE_MAX]; uint64_t samplers[DescriptorCache::SAMPLERS_MAX]; uint32_t sgprs[DescriptorCache::PUSH_CONSTANTS_MAX]; @@ -2951,7 +3160,13 @@ static void BindDescriptors(VkCommandBuffer vk_buffer, VkPipelineBindPoint pipel } if (bind.textures2D.textures_num > 0) { - PrepareTextures(bind.textures2D, textures2d, &sgprs_ptr); + if (bind_params.textures2D_without_sampler) + { + PrepareTextures(bind.textures2D, textures2d_storage, &sgprs_ptr, false); + } else + { + PrepareTextures(bind.textures2D, textures2d_sampled, &sgprs_ptr, true); + } } if (bind.samplers.samplers_num > 0) { @@ -2963,12 +3178,12 @@ static void BindDescriptors(VkCommandBuffer vk_buffer, VkPipelineBindPoint pipel gds_buffer = g_render_ctx->GetGdsBuffer()->GetBuffer(g_render_ctx->GetGraphicCtx()); } - // EXIT_NOT_IMPLEMENTED(bind.gds_pointers.pointers_num > 0); - EXIT_IF(bind.push_constant_size != (sgprs_ptr - sgprs) * 4); auto* descriptor_set = g_render_ctx->GetDescriptorCache()->GetDescriptor( - stage, bind.storage_buffers.buffers_num, storage_buffers, bind.textures2D.textures_num, textures2d, bind.samplers.samplers_num, + stage, bind.storage_buffers.buffers_num, storage_buffers, + (bind_params.textures2D_without_sampler ? 0 : bind.textures2D.textures_num), textures2d_sampled, + (bind_params.textures2D_without_sampler ? bind.textures2D.textures_num : 0), textures2d_storage, bind.samplers.samplers_num, samplers, (gds_buffer != nullptr ? 1 : 0), &gds_buffer); EXIT_IF(descriptor_set == nullptr); @@ -3073,11 +3288,11 @@ void GraphicsRenderDrawIndex(CommandBuffer* buffer, HardwareContext* ctx, UserCo vkCmdBindVertexBuffers(vk_buffer, i, 1, &vertices->buffer, &offset); } - BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, vs_input_info.bind, VK_SHADER_STAGE_VERTEX_BIT, - DescriptorCache::Stage::Vertex); + BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, vs_input_info.bind, + pipeline->additional_params->vs_bind, VK_SHADER_STAGE_VERTEX_BIT, DescriptorCache::Stage::Vertex); - BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, ps_input_info.bind, VK_SHADER_STAGE_FRAGMENT_BIT, - DescriptorCache::Stage::Pixel); + BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, ps_input_info.bind, + pipeline->additional_params->ps_bind, VK_SHADER_STAGE_FRAGMENT_BIT, DescriptorCache::Stage::Pixel); VulkanBuffer* indices = static_cast( GpuMemoryGetObject(g_render_ctx->GetGraphicCtx(), reinterpret_cast(index_addr), index_size, IndexBufferGpuObject())); @@ -3178,11 +3393,11 @@ void GraphicsRenderDrawIndexAuto(CommandBuffer* buffer, HardwareContext* ctx, Us vkCmdBindVertexBuffers(vk_buffer, i, 1, &vertices->buffer, &offset); } - BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, vs_input_info.bind, VK_SHADER_STAGE_VERTEX_BIT, - DescriptorCache::Stage::Vertex); + BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, vs_input_info.bind, + pipeline->additional_params->vs_bind, VK_SHADER_STAGE_VERTEX_BIT, DescriptorCache::Stage::Vertex); - BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, ps_input_info.bind, VK_SHADER_STAGE_FRAGMENT_BIT, - DescriptorCache::Stage::Pixel); + BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline_layout, ps_input_info.bind, + pipeline->additional_params->ps_bind, VK_SHADER_STAGE_FRAGMENT_BIT, DescriptorCache::Stage::Pixel); switch (ucfg->GetPrimType()) { @@ -3220,9 +3435,6 @@ void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HardwareContext* ctx, u } EXIT_NOT_IMPLEMENTED(mode != 0); - // EXIT_NOT_IMPLEMENTED(thread_group_x != 2); - // EXIT_NOT_IMPLEMENTED(thread_group_y != 1); - // EXIT_NOT_IMPLEMENTED(thread_group_z != 1); const auto& cs_regs = ctx->GetCs(); @@ -3235,12 +3447,10 @@ void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HardwareContext* ctx, u vkCmdBindPipeline(vk_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); - BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline_layout, input_info.bind, VK_SHADER_STAGE_COMPUTE_BIT, - DescriptorCache::Stage::Compute); + BindDescriptors(vk_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline_layout, input_info.bind, + pipeline->additional_params->cs_bind, VK_SHADER_STAGE_COMPUTE_BIT, DescriptorCache::Stage::Compute); vkCmdDispatch(vk_buffer, thread_group_x, thread_group_y, thread_group_z); - - // KYTY_NOT_IMPLEMENTED; } void GraphicsRenderMemoryBarrier(CommandBuffer* buffer) @@ -3381,6 +3591,36 @@ void GraphicsRenderWriteAtEndOfPipeWithWriteBack(CommandBuffer* buffer, uint64_t LabelSet(buffer, label); } +void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value) +{ + EXIT_IF(g_render_ctx == nullptr); + EXIT_IF(dst_gpu_addr == nullptr); + EXIT_IF(buffer == nullptr); + EXIT_IF(buffer->IsInvalid()); + + Core::LockGuard lock(g_render_ctx->GetMutex()); + + Graphics::LabelGpuObject label_info( + value, + [](const uint64_t* /*args*/) + { + EXIT_IF(g_render_ctx == nullptr); + GpuMemoryWriteBack(g_render_ctx->GetGraphicCtx()); + return true; + }, + [](const uint64_t* /*args*/) + { + EXIT_IF(g_render_ctx == nullptr); + g_render_ctx->TriggerEopEvent(); + return true; + }); + + auto* label = + static_cast(GpuMemoryGetObject(g_render_ctx->GetGraphicCtx(), reinterpret_cast(dst_gpu_addr), 8, label_info)); + + LabelSet(buffer, label); +} + void GraphicsRenderWriteAtEndOfPipeWithInterrupt(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value) { EXIT_IF(g_render_ctx == nullptr); @@ -3394,13 +3634,7 @@ void GraphicsRenderWriteAtEndOfPipeWithInterrupt(CommandBuffer* buffer, uint64_t [](const uint64_t* /*args*/) { EXIT_IF(g_render_ctx == nullptr); - EXIT_IF(g_render_ctx->GetEopEq() == nullptr); - - auto result = LibKernel::EventQueue::KernelTriggerEvent( - g_render_ctx->GetEopEq(), GRAPHICS_EVENT_EOP, LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS, - reinterpret_cast(LibKernel::KernelReadTsc())); - - EXIT_NOT_IMPLEMENTED(result != OK); + g_render_ctx->TriggerEopEvent(); return true; }); @@ -3441,13 +3675,7 @@ void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBackFlip(CommandBuffer* buf [](const uint64_t* /*args*/) { EXIT_IF(g_render_ctx == nullptr); - EXIT_IF(g_render_ctx->GetEopEq() == nullptr); - - auto result = LibKernel::EventQueue::KernelTriggerEvent(g_render_ctx->GetEopEq(), GRAPHICS_EVENT_EOP, - LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS, - reinterpret_cast(LibKernel::KernelReadTsc())); - - EXIT_NOT_IMPLEMENTED(result != OK); + g_render_ctx->TriggerEopEvent(); return true; }, args); @@ -3458,6 +3686,39 @@ void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBackFlip(CommandBuffer* buf LabelSet(buffer, label); } +void GraphicsRenderWriteAtEndOfPipeWithFlip(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value, int handle, int index, + int flip_mode, int64_t flip_arg) +{ + EXIT_IF(g_render_ctx == nullptr); + EXIT_IF(dst_gpu_addr == nullptr); + EXIT_IF(buffer == nullptr); + EXIT_IF(buffer->IsInvalid()); + + Core::LockGuard lock(g_render_ctx->GetMutex()); + + uint64_t args[] = {static_cast(handle), static_cast(index), static_cast(flip_mode), + static_cast(flip_arg)}; + + Graphics::LabelGpuObject label_info( + value, + [](const uint64_t* args) + { + int handle = static_cast(args[0]); + int index = static_cast(args[1]); + int flip_mode = static_cast(args[2]); + int64_t flip_arg = static_cast(args[3]); + + VideoOut::VideoOutSubmitFlip(handle, index, flip_mode, flip_arg); + return true; + }, + nullptr, args); + + auto* label = + static_cast(GpuMemoryGetObject(g_render_ctx->GetGraphicCtx(), reinterpret_cast(dst_gpu_addr), 4, label_info)); + + LabelSet(buffer, label); +} + void GraphicsRenderWriteBack() { Core::LockGuard lock(g_render_ctx->GetMutex()); @@ -3475,12 +3736,13 @@ static void eop_event_reset_func(LibKernel::EventQueue::KernelEqueueEvent* event event->event.data = 0; } -static void eop_event_delete_func(LibKernel::EventQueue::KernelEqueueEvent* event) +static void eop_event_delete_func(LibKernel::EventQueue::KernelEqueue eq, LibKernel::EventQueue::KernelEqueueEvent* event) { EXIT_IF(event == nullptr); EXIT_IF(g_render_ctx == nullptr); - EXIT_IF(g_render_ctx->GetEopEq() == nullptr); - g_render_ctx->SetEopEq(nullptr); + EXIT_NOT_IMPLEMENTED(event->event.ident != GRAPHICS_EVENT_EOP); + EXIT_NOT_IMPLEMENTED(event->event.filter != LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS); + g_render_ctx->DeleteEopEq(eq); } static void eop_event_trigger_func(LibKernel::EventQueue::KernelEqueueEvent* event, void* trigger_data) @@ -3496,23 +3758,22 @@ int GraphicsRenderAddEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id, voi EXIT_IF(g_render_ctx == nullptr); EXIT_NOT_IMPLEMENTED(id != GRAPHICS_EVENT_EOP); - EXIT_NOT_IMPLEMENTED(g_render_ctx->GetEopEq() != nullptr); LibKernel::EventQueue::KernelEqueueEvent event; - event.triggered = false; - event.event.ident = GRAPHICS_EVENT_EOP; - event.event.filter = LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS; - event.event.udata = udata; - event.event.fflags = 0; - event.event.data = 0; - event.filter.delete_func = eop_event_delete_func; - event.filter.reset_func = eop_event_reset_func; - event.filter.trigger_func = eop_event_trigger_func; - event.filter.data = nullptr; + event.triggered = false; + event.event.ident = GRAPHICS_EVENT_EOP; + event.event.filter = LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS; + event.event.udata = udata; + event.event.fflags = 0; + event.event.data = 0; + event.filter.delete_event_func = eop_event_delete_func; + event.filter.reset_func = eop_event_reset_func; + event.filter.trigger_func = eop_event_trigger_func; + event.filter.data = nullptr; int result = LibKernel::EventQueue::KernelAddEvent(eq, event); - g_render_ctx->SetEopEq(eq); + g_render_ctx->AddEopEq(eq); return result; } @@ -3522,11 +3783,10 @@ int GraphicsRenderDeleteEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id) EXIT_IF(g_render_ctx == nullptr); EXIT_NOT_IMPLEMENTED(id != GRAPHICS_EVENT_EOP); - EXIT_NOT_IMPLEMENTED(g_render_ctx->GetEopEq() == nullptr); int result = LibKernel::EventQueue::KernelDeleteEvent(eq, GRAPHICS_EVENT_EOP, LibKernel::EventQueue::KERNEL_EVFILT_GRAPHICS); - g_render_ctx->SetEopEq(nullptr); + g_render_ctx->DeleteEopEq(eq); return result; } diff --git a/source/emulator/src/Graphics/GraphicsRun.cpp b/source/emulator/src/Graphics/GraphicsRun.cpp index 373cdfe..a57f3bb 100644 --- a/source/emulator/src/Graphics/GraphicsRun.cpp +++ b/source/emulator/src/Graphics/GraphicsRun.cpp @@ -60,9 +60,12 @@ public: void DrawIndex(uint32_t index_count, const void* index_addr, uint32_t flags, uint32_t type); void DrawIndexAuto(uint32_t index_count, uint32_t flags); void WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action, - uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value); + uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value, + uint32_t interrupt_selector); void WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action, - uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value); + uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value, + uint32_t interrupt_selector); + void Flip(void* dst_gpu_addr, uint32_t value); void FlipWithInterrupt(uint32_t eop_event_type, uint32_t cache_action, void* dst_gpu_addr, uint32_t value); void WriteBack(); void MemoryBarrier(); @@ -140,6 +143,7 @@ public: int flip_mode, int64_t flip_arg); void Done(); void WaitForIdle(); + bool IsIdle(); void SetCp(CommandProcessor* cp) { @@ -197,6 +201,7 @@ public: void DingDong(uint32_t offset_dw); void Done(); void WaitForIdle(); + bool IsIdle(); void SetCp(CommandProcessor* cp) { @@ -257,6 +262,7 @@ public: void Done(); void Wait(); int GetFrameNum(); + bool AreSubmitsAllowed(); private: void Init(); @@ -493,6 +499,27 @@ void Gpu::Done() m_done_num++; } +bool Gpu::AreSubmitsAllowed() +{ + Core::LockGuard lock(m_mutex); + + if (m_gfx_ring->IsIdle()) + { + for (auto& cr: m_compute_ring) + { + if (cr != nullptr) + { + if (!cr->IsIdle()) + { + return false; + } + } + } + return true; + } + return false; +} + int Gpu::GetFrameNum() { Core::LockGuard lock(m_mutex); @@ -583,6 +610,7 @@ void CommandProcessor::BufferInit() EXIT_IF(buf != nullptr); buf = new CommandBuffer; + buf->SetParent(this); buf->SetQueue(m_queue); } @@ -777,6 +805,12 @@ void GraphicsRing::WaitForIdle() } } +bool GraphicsRing::IsIdle() +{ + Core::LockGuard lock(m_mutex); + return m_idle; +} + GraphicsRing::CmdBatch GraphicsRing::GetCmdBatch() { Core::LockGuard lock(m_mutex); @@ -956,6 +990,12 @@ void ComputeRing::WaitForIdle() } } +bool ComputeRing::IsIdle() +{ + Core::LockGuard lock(m_mutex); + return m_idle; +} + void ComputeRing::SetActive(bool flag) { Core::LockGuard lock(m_mutex); @@ -1078,7 +1118,8 @@ void CommandProcessor::WaitFlipDone(uint32_t video_out_handle, uint32_t display_ } void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action, - uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value) + uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value, + uint32_t interrupt_selector) { Core::LockGuard lock(m_mutex); @@ -1091,12 +1132,13 @@ void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_ printf("\t cache_action = 0x%08" PRIx32 "\n", cache_action); printf("\t event_index = 0x%08" PRIx32 "\n", event_index); printf("\t event_write_source = 0x%08" PRIx32 "\n", event_write_source); + printf("\t interrupt_selector = 0x%08" PRIx32 "\n", interrupt_selector); printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast(dst_gpu_addr)); printf("\t value = 0x%08" PRIx32 "\n", value); EXIT_NOT_IMPLEMENTED(cache_policy != 0x00000000); EXIT_NOT_IMPLEMENTED(event_write_dest != 0x00000000); - // EXIT_NOT_IMPLEMENTED(event_write_source != 0x00000002); + EXIT_NOT_IMPLEMENTED(interrupt_selector != 0x0); if (event_write_source == 0x00000002 && eop_event_type == 0x0000002f && cache_action == 0x00000000 && event_index == 0x00000006) { @@ -1111,7 +1153,8 @@ void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_ } void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action, - uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value) + uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value, + uint32_t interrupt_selector) { Core::LockGuard lock(m_mutex); @@ -1124,28 +1167,38 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_ printf("\t cache_action = 0x%08" PRIx32 "\n", cache_action); printf("\t event_index = 0x%08" PRIx32 "\n", event_index); printf("\t event_write_source = 0x%08" PRIx32 "\n", event_write_source); + printf("\t interrupt_selector = 0x%08" PRIx32 "\n", interrupt_selector); printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast(dst_gpu_addr)); printf("\t value = 0x%016" PRIx64 "\n", value); EXIT_NOT_IMPLEMENTED(cache_policy != 0x00000000); EXIT_NOT_IMPLEMENTED(event_write_dest != 0x00000000); - // EXIT_NOT_IMPLEMENTED(event_write_source != 0x00000002); - if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000002) + if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x02 && + (interrupt_selector == 0x00 || interrupt_selector == 0x03)) { GraphicsRenderWriteAtEndOfPipe(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); - } else if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000001) + } else if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x01 && + (interrupt_selector == 0x00 || interrupt_selector == 0x03)) { GraphicsRenderWriteAtEndOfPipe(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); - } else if (((eop_event_type == 0x00000004 && event_index == 0x00000005) || - (eop_event_type == 0x00000028 && event_index == 0x00000005) || - (eop_event_type == 0x0000002f && event_index == 0x00000006)) && - cache_action == 0x00000038 && event_write_source == 0x00000002) + } else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x05) || + (eop_event_type == 0x2f && event_index == 0x06)) && + cache_action == 0x38 && event_write_source == 0x02 && (interrupt_selector == 0x00 || interrupt_selector == 0x03)) { GraphicsRenderWriteAtEndOfPipeWithWriteBack(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); - } else if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000004) + } else if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x04 && + (interrupt_selector == 0x00 || interrupt_selector == 0x03)) { GraphicsRenderWriteAtEndOfPipeClockCounter(m_buffer[m_current_buffer], static_cast(dst_gpu_addr)); + } else if ((eop_event_type == 0x04 && event_index == 0x05) && cache_action == 0x00 && event_write_source == 0x02 && + interrupt_selector == 0x02) + { + GraphicsRenderWriteAtEndOfPipeWithInterrupt(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); + } else if ((eop_event_type == 0x04 && event_index == 0x05) && cache_action == 0x3b && event_write_source == 0x02 && + interrupt_selector == 0x02) + { + GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); } else { EXIT("unknown event type\n"); @@ -1178,6 +1231,20 @@ void CommandProcessor::TriggerEvent(uint32_t event_type, uint32_t event_index) } } +void CommandProcessor::Flip(void* dst_gpu_addr, uint32_t value) +{ + Core::LockGuard lock(m_mutex); + + EXIT_IF(m_current_buffer < 0 || m_current_buffer >= VK_BUFFERS_NUM); + + printf("CommandProcessor::Flip()\n"); + printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast(dst_gpu_addr)); + printf("\t value = 0x%08" PRIx32 "\n", value); + + GraphicsRenderWriteAtEndOfPipeWithFlip(m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value, m_flip.handle, + m_flip.index, m_flip.flip_mode, m_flip.flip_arg); +} + void CommandProcessor::FlipWithInterrupt(uint32_t eop_event_type, uint32_t cache_action, void* dst_gpu_addr, uint32_t value) { Core::LockGuard lock(m_mutex); @@ -1207,6 +1274,13 @@ void CommandProcessor::WriteBack() GraphicsRenderWriteBack(); } +void CommandBuffer::CommandProcessorWait() +{ + EXIT_IF(m_parent == nullptr); + + m_parent->BufferWait(); +} + void GraphicsRunSubmit(uint32_t* cmd_draw_buffer, uint32_t num_draw_dw, uint32_t* cmd_const_buffer, uint32_t num_const_dw) { EXIT_IF(cmd_draw_buffer == nullptr); @@ -1262,6 +1336,13 @@ void GraphicsRunDone() g_gpu->Done(); } +bool GraphicsRunAreSubmitsAllowed() +{ + EXIT_IF(g_gpu == nullptr); + + return g_gpu->AreSubmitsAllowed(); +} + int GraphicsRunGetFrameNum() { EXIT_IF(g_gpu == nullptr); @@ -1456,11 +1537,12 @@ KYTY_CP_OP_PARSER(cp_op_event_write_eop) uint32_t cache_action = (buffer[0] >> 12u) & 0x3fu; uint32_t event_index = (buffer[0] >> 8u) & 0x7u; uint32_t event_write_source = ((buffer[2] >> 29u) & 0x7u); + uint32_t interrupt_selector = (buffer[2] >> 24u) & 0x7u; auto* dst_gpu_addr = reinterpret_cast(buffer[1] | (static_cast(buffer[2] & 0xffffu) << 32u)); uint64_t value = (buffer[3] | (static_cast(buffer[4]) << 32u)); cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr, - value); + value, interrupt_selector); return 5; } @@ -1477,12 +1559,13 @@ KYTY_CP_OP_PARSER(cp_op_event_write_eos) uint32_t cache_action = (buffer[0] >> 12u) & 0x3fu; uint32_t event_index = (buffer[0] >> 8u) & 0x7u; uint32_t event_write_source = ((buffer[2] >> 29u) & 0x7u); + uint32_t interrupt_selector = (buffer[2] >> 24u) & 0x7u; auto* dst_gpu_addr = reinterpret_cast(buffer[1] | (static_cast(buffer[2] & 0xffffu) << 32u)); uint32_t value = buffer[3]; cp->WriteAtEndOfPipe32(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr, - value); + value, interrupt_selector); return 4; } @@ -1503,10 +1586,8 @@ KYTY_CP_OP_PARSER(cp_op_release_mem) auto* dst_gpu_addr = reinterpret_cast(buffer[2] | (static_cast(buffer[3]) << 32u)); uint64_t value = (buffer[4] | (static_cast(buffer[5]) << 32u)); - EXIT_NOT_IMPLEMENTED(interrupt_selector != 0x3); - cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr, - value); + value, interrupt_selector); return 6; } @@ -1764,6 +1845,13 @@ KYTY_CP_OP_PARSER(cp_op_marker) case 0x0: cp->SetEmbeddedDataMarker(buffer + 1, len_dw, align); break; case 0x4: cp->SetUserDataMarker(UserSgprType::Vsharp); break; case 0xd: cp->SetUserDataMarker(UserSgprType::Region); break; + case 0x778: + { + auto* addr = reinterpret_cast(buffer[1] | (static_cast(buffer[2]) << 32u)); + uint32_t value = buffer[3]; + cp->Flip(addr, value); + break; + } case 0x781: { auto* addr = reinterpret_cast(buffer[1] | (static_cast(buffer[2]) << 32u)); diff --git a/source/emulator/src/Graphics/Label.cpp b/source/emulator/src/Graphics/Label.cpp index 4990cf1..d012dd8 100644 --- a/source/emulator/src/Graphics/Label.cpp +++ b/source/emulator/src/Graphics/Label.cpp @@ -14,11 +14,19 @@ namespace Kyty::Libs::Graphics { +enum LabelStatus +{ + New, + Active, + ActiveDeleted, + NotActive, +}; + struct Label { VkDevice device = nullptr; VkEvent event = nullptr; - bool active = false; + LabelStatus status = LabelStatus::New; uint64_t* dst_gpu_addr64 = nullptr; uint64_t value64 = 0; uint32_t* dst_gpu_addr32 = nullptr; @@ -26,6 +34,7 @@ struct Label LabelGpuObject::callback_t callback_1 = nullptr; LabelGpuObject::callback_t callback_2 = nullptr; uint64_t args[4] = {}; + CommandBuffer* buffer = nullptr; }; class LabelManager @@ -67,15 +76,22 @@ void LabelManager::ThreadRun(void* data) int active_count = 0; + Vector deleted_labels; + for (auto& label: manager->m_labels) { - if (label->active) + if (label->status == LabelStatus::Active || label->status == LabelStatus::ActiveDeleted) { active_count++; if (vkGetEventStatus(label->device, label->event) == VK_EVENT_SET) { - label->active = false; + if (label->status == LabelStatus::ActiveDeleted) + { + deleted_labels.Add(label); + } + + label->status = LabelStatus::NotActive; bool write = true; @@ -108,6 +124,11 @@ void LabelManager::ThreadRun(void* data) } } + for (auto& label: deleted_labels) + { + manager->Delete(label); + } + if (active_count == 0) { manager->m_cond_var.Wait(&manager->m_mutex); @@ -129,7 +150,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint64_t* dst_gpu_addr, uint64_ auto* label = new Label; - label->active = false; + label->status = LabelStatus::New; label->dst_gpu_addr64 = dst_gpu_addr; label->value64 = value; label->dst_gpu_addr32 = nullptr; @@ -142,6 +163,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint64_t* dst_gpu_addr, uint64_ label->args[1] = args[1]; label->args[2] = args[2]; label->args[3] = args[3]; + label->buffer = nullptr; VkEventCreateInfo create_info {}; create_info.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; @@ -168,7 +190,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint32_t* dst_gpu_addr, uint32_ auto* label = new Label; - label->active = false; + label->status = LabelStatus::New; label->dst_gpu_addr32 = dst_gpu_addr; label->value32 = value; label->dst_gpu_addr64 = nullptr; @@ -208,13 +230,24 @@ void LabelManager::Delete(Label* label) EXIT_NOT_IMPLEMENTED(!m_labels.IndexValid(index)); - m_labels.RemoveAt(index); + EXIT_NOT_IMPLEMENTED(label->status != LabelStatus::NotActive && label->status != LabelStatus::Active); - EXIT_NOT_IMPLEMENTED(label->active); + if (label->status == LabelStatus::Active) + { + label->status = LabelStatus::ActiveDeleted; + } else + { + m_labels.RemoveAt(index); - vkDestroyEvent(label->device, label->event, nullptr); + EXIT_IF(label->buffer == nullptr); - delete label; + // All submitted commands that refer to event must have completed execution + label->buffer->CommandProcessorWait(); + + vkDestroyEvent(label->device, label->event, nullptr); + + delete label; + } } void LabelManager::Set(CommandBuffer* buffer, Label* label) @@ -231,12 +264,14 @@ void LabelManager::Set(CommandBuffer* buffer, Label* label) EXIT_NOT_IMPLEMENTED(!m_labels.IndexValid(index)); - EXIT_NOT_IMPLEMENTED(label->active); + EXIT_NOT_IMPLEMENTED(label->status != LabelStatus::New && label->status != LabelStatus::NotActive); - label->active = true; + label->status = LabelStatus::Active; EXIT_IF(label->event == nullptr); + label->buffer = buffer; + auto* vk_buffer = buffer->GetPool()->buffers[buffer->GetIndex()]; EXIT_NOT_IMPLEMENTED(vk_buffer == nullptr); diff --git a/source/emulator/src/Graphics/Shader.cpp b/source/emulator/src/Graphics/Shader.cpp index 4366ae0..1dd8777 100644 --- a/source/emulator/src/Graphics/Shader.cpp +++ b/source/emulator/src/Graphics/Shader.cpp @@ -35,6 +35,8 @@ [[maybe_unused]] uint32_t dw, [[maybe_unused]] uint32_t num_dw #define KYTY_CP_OP_PARSER(f) static uint32_t f(KYTY_CP_OP_PARSER_ARGS) +KYTY_ENUM_RANGE(Kyty::Libs::Graphics::ShaderInstructionType, 0, static_cast(Kyty::Libs::Graphics::ShaderInstructionType::ZMax)); + namespace Kyty::Libs::Graphics { struct ShaderBinaryInfo @@ -212,6 +214,7 @@ static String dbg_fmt_to_str(const ShaderInstruction& inst) case ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4: return U"Vdata4Vaddr2SvSoffsOffenIdxenFloat4"; break; case ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7: return U"Vdata4Vaddr3StSsDmask7"; break; case ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF: return U"Vdata4Vaddr3StSsDmaskF"; break; + case ShaderInstructionFormat::Vdata4Vaddr3StDmaskF: return U"Vdata4Vaddr3StDmaskF"; break; case ShaderInstructionFormat::SVdstSVsrc0SVsrc1: return U"SVdstSVsrc0SVsrc1"; break; case ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2: return U"VdstVsrc0Vsrc1Smask2"; break; case ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2: return U"VdstVsrc0Vsrc1Vsrc2"; break; @@ -474,7 +477,32 @@ KYTY_SHADER_PARSER(shader_parse_sopc) KYTY_SHADER_PARSER(shader_parse_sopk) { - KYTY_NOT_IMPLEMENTED; + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + uint32_t opcode = (buffer[0] >> 23u) & 0x1fu; + auto imm = static_cast(buffer[0] >> 0u & 0xffffu); + uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(sdst); + + switch (opcode) // NOLINT + { + case 0x00: + inst.type = ShaderInstructionType::SMovkI32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + inst.src[0].type = ShaderOperandType::IntegerInlineConstant; + inst.src[0].constant.i = imm; + inst.src_num = 1; + break; + default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sopk opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); + } + + dst->GetInstructions().Add(inst); + return 1; } @@ -659,6 +687,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2) inst.src[1].size = 2; break; case 0x0e: inst.type = ShaderInstructionType::SAndB32; break; + case 0x0f: + inst.type = ShaderInstructionType::SAndB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; case 0x11: inst.type = ShaderInstructionType::SOrB64; inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; @@ -666,6 +701,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2) inst.src[0].size = 2; inst.src[1].size = 2; break; + case 0x13: + inst.type = ShaderInstructionType::SXorB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; case 0x15: inst.type = ShaderInstructionType::SAndn2B64; inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; @@ -673,6 +715,20 @@ KYTY_SHADER_PARSER(shader_parse_sop2) inst.src[0].size = 2; inst.src[1].size = 2; break; + case 0x17: + inst.type = ShaderInstructionType::SOrn2B64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x19: + inst.type = ShaderInstructionType::SNandB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; case 0x1b: inst.type = ShaderInstructionType::SNorB64; inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; @@ -680,6 +736,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2) inst.src[0].size = 2; inst.src[1].size = 2; break; + case 0x1d: + inst.type = ShaderInstructionType::SXnorB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; case 0x1e: inst.type = ShaderInstructionType::SLshlB32; break; case 0x20: inst.type = ShaderInstructionType::SLshrB32; break; case 0x24: inst.type = ShaderInstructionType::SBfmB32; break; @@ -757,6 +820,7 @@ KYTY_SHADER_PARSER(shader_parse_vopc) case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; + case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown vopc opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); } @@ -1047,7 +1111,10 @@ KYTY_SHADER_PARSER(shader_parse_vop3) case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break; case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break; case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break; + case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; + case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; + case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; case 0x100: inst.type = ShaderInstructionType::VCndmaskB32; inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; @@ -1441,7 +1508,7 @@ KYTY_SHADER_PARSER(shader_parse_mimg) EXIT_IF(buffer == nullptr || buffer < src); uint32_t slc = (buffer[0] >> 25u) & 0x1u; - uint32_t opcode = (buffer[0] >> 18u) & 0x1fu; + uint32_t opcode = (buffer[0] >> 18u) & 0x7fu; uint32_t lwe = (buffer[0] >> 17u) & 0x1u; uint32_t tff = (buffer[0] >> 16u) & 0x1u; uint32_t r128 = (buffer[0] >> 15u) & 0x1u; @@ -1474,23 +1541,33 @@ KYTY_SHADER_PARSER(shader_parse_mimg) inst.src[1] = operand_parse(srsrc * 4); inst.src[2] = operand_parse(ssamp * 4); - if (dmask == 0x7) - { - inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; - inst.dst.size = 3; - } else if (dmask == 0xf) - { - inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF; - inst.dst.size = 4; - } - - switch (opcode) // NOLINT + switch (opcode) { case 0x00: + inst.type = ShaderInstructionType::ImageLoad; + inst.src[0].size = 3; + inst.src[1].size = 8; + inst.src_num = 2; + if (dmask == 0xf) + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF; + inst.dst.size = 4; + } + break; + case 0x20: inst.type = ShaderInstructionType::ImageSample; inst.src[0].size = 3; inst.src[1].size = 8; inst.src[2].size = 4; + if (dmask == 0x7) + { + inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; + inst.dst.size = 3; + } else if (dmask == 0xf) + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF; + inst.dst.size = 4; + } break; default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown mimg opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); } @@ -2222,7 +2299,7 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s info->pointers_num++; } -static void ShaderCalcBindingIndices(ShaderResources* bind) +static void ShaderCalcBindingIndices(ShaderBindResources* bind) { int binding_index = 0; @@ -2473,7 +2550,7 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* ShaderCalcBindingIndices(&info->bind); } -static void ShaderDbgDumpResources(const ShaderResources& bind) +static void ShaderDbgDumpResources(const ShaderBindResources& bind) { printf("\t descriptor_set_slot = %u\n", bind.descriptor_set_slot); printf("\t push_constant_offset = %u\n", bind.push_constant_offset); @@ -2835,45 +2912,34 @@ private: String m_file_name; }; -Vector ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info) +ShaderCode ShaderParseVS(const VertexShaderInfo* regs) { KYTY_PROFILER_FUNCTION(profiler::colors::Amber300); - String source; - Vector ret; - ShaderLogHelper log("vs"); + ShaderCode code; + code.SetType(ShaderType::Vertex); if (regs->vs_embedded) { - source = SpirvGetEmbeddedVs(regs->vs_embedded_id); + code.SetEmbedded(true); + code.SetEmbeddedId(regs->vs_embedded_id); } else { const auto* src = reinterpret_cast(regs->vs_regs.GetGpuAddress()); EXIT_NOT_IMPLEMENTED(src == nullptr); - vs_print("ShaderRecompileVS()", regs->vs_regs); + vs_print("ShaderParseVS()", regs->vs_regs); vs_check(regs->vs_regs); - for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++) - { - const auto& r = input_info->bind.storage_buffers.buffers[i]; - EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0); - } - const auto* header = GetBinaryInfo(src); EXIT_NOT_IMPLEMENTED(header == nullptr); - bi_print("ShaderRecompileVS():ShaderBinaryInfo", *header); - - ShaderCode code; - code.SetType(ShaderType::Vertex); + bi_print("ShaderParseVS():ShaderBinaryInfo", *header); shader_parse(0, src, nullptr, &code); - log.DumpOriginalShader(code); - if (g_debug_printfs != nullptr) { auto id = (static_cast(header->hash0) << 32u) | header->crc32; @@ -2882,6 +2948,31 @@ Vector ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVer code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds; } } + } + + return code; +} + +Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info) +{ + KYTY_PROFILER_FUNCTION(profiler::colors::Amber300); + + String source; + Vector ret; + ShaderLogHelper log("vs"); + + if (code.IsEmbedded()) + { + source = SpirvGetEmbeddedVs(code.GetEmbeddedId()); + } else + { + for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++) + { + const auto& r = input_info->bind.storage_buffers.buffers[i]; + EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0); + } + + log.DumpOriginalShader(code); source = SpirvGenerateSource(code, input_info, nullptr, nullptr); } @@ -2898,21 +2989,48 @@ Vector ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVer return ret; } -Vector ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info) +ShaderCode ShaderParsePS(const PixelShaderInfo* regs) { KYTY_PROFILER_FUNCTION(profiler::colors::Blue300); - ShaderLogHelper log("ps"); - const auto* src = reinterpret_cast(regs->ps_regs.data_addr); EXIT_NOT_IMPLEMENTED(src == nullptr); - ps_print("ShaderRecompilePS()", regs->ps_regs); + ps_print("ShaderParsePS()", regs->ps_regs); ps_check(regs->ps_regs); EXIT_NOT_IMPLEMENTED(regs->ps_regs.user_sgpr != regs->ps_user_sgpr.count); + const auto* header = GetBinaryInfo(src); + + EXIT_NOT_IMPLEMENTED(header == nullptr); + + bi_print("ShaderParsePS():ShaderBinaryInfo", *header); + + ShaderCode code; + code.SetType(ShaderType::Pixel); + + shader_parse(0, src, nullptr, &code); + + if (g_debug_printfs != nullptr) + { + auto id = (static_cast(header->hash0) << 32u) | header->crc32; + if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index)) + { + code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds; + } + } + + return code; +} + +Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info) +{ + KYTY_PROFILER_FUNCTION(profiler::colors::Blue300); + + ShaderLogHelper log("ps"); + for (uint32_t i = 0; i < input_info->input_num; i++) { EXIT_NOT_IMPLEMENTED(input_info->interpolator_settings[i] != i); @@ -2924,30 +3042,10 @@ Vector ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixe EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0); } - const auto* header = GetBinaryInfo(src); - - EXIT_NOT_IMPLEMENTED(header == nullptr); - - bi_print("ShaderRecompilePS():ShaderBinaryInfo", *header); - - ShaderCode code; - code.SetType(ShaderType::Pixel); - - shader_parse(0, src, nullptr, &code); - Vector ret; log.DumpOriginalShader(code); - if (g_debug_printfs != nullptr) - { - auto id = (static_cast(header->hash0) << 32u) | header->crc32; - if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index)) - { - code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds; - } - } - auto source = SpirvGenerateSource(code, nullptr, input_info, nullptr); log.DumpRecompiledShader(source); @@ -2962,42 +3060,30 @@ Vector ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixe return ret; } -Vector ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info) +ShaderCode ShaderParseCS(const ComputeShaderInfo* regs) { KYTY_PROFILER_FUNCTION(profiler::colors::CyanA700); - ShaderLogHelper log("cs"); - const auto* src = reinterpret_cast(regs->cs_regs.data_addr); EXIT_NOT_IMPLEMENTED(src == nullptr); - cs_print("ShaderRecompileCS()", regs->cs_regs); + cs_print("ShaderParseCS()", regs->cs_regs); cs_check(regs->cs_regs); EXIT_NOT_IMPLEMENTED(regs->cs_regs.user_sgpr > regs->cs_user_sgpr.count); - for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++) - { - const auto& r = input_info->bind.storage_buffers.buffers[i]; - EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0); - } - const auto* header = GetBinaryInfo(src); EXIT_NOT_IMPLEMENTED(header == nullptr); - bi_print("ShaderRecompileCS():ShaderBinaryInfo", *header); + bi_print("ShaderParseCS():ShaderBinaryInfo", *header); ShaderCode code; code.SetType(ShaderType::Compute); shader_parse(0, src, nullptr, &code); - Vector ret; - - log.DumpOriginalShader(code); - if (g_debug_printfs != nullptr) { auto id = (static_cast(header->hash0) << 32u) | header->crc32; @@ -3007,6 +3093,25 @@ Vector ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderCo } } + return code; +} + +Vector ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info) +{ + KYTY_PROFILER_FUNCTION(profiler::colors::CyanA700); + + ShaderLogHelper log("cs"); + + for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++) + { + const auto& r = input_info->bind.storage_buffers.buffers[i]; + EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0); + } + + Vector ret; + + log.DumpOriginalShader(code); + auto source = SpirvGenerateSource(code, nullptr, nullptr, input_info); log.DumpRecompiledShader(source); @@ -3023,7 +3128,37 @@ Vector ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderCo return ret; } -static void ShaderGetBindIds(ShaderId* ret, const ShaderResources& bind) +static ShaderBindParameters ShaderUpdateBindInfo(const ShaderCode& code, const ShaderBindResources* bind) +{ + ShaderBindParameters p {}; + if (bind->textures2D.textures_num > 0) + { + bool image_sample = code.HasAnyOf({ShaderInstructionType::ImageSample}); + bool image_load = code.HasAnyOf({ShaderInstructionType::ImageLoad}); + + EXIT_NOT_IMPLEMENTED(image_sample && image_load); + + p.textures2D_without_sampler = image_load; + } + return p; +} + +ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info) +{ + return ShaderUpdateBindInfo(code, &input_info->bind); +} + +ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info) +{ + return ShaderUpdateBindInfo(code, &input_info->bind); +} + +ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info) +{ + return ShaderUpdateBindInfo(code, &input_info->bind); +} + +static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind) { ret->ids.Add(bind.storage_buffers.buffers_num); diff --git a/source/emulator/src/Graphics/ShaderSpirv.cpp b/source/emulator/src/Graphics/ShaderSpirv.cpp index ec097c6..3288bef 100644 --- a/source/emulator/src/Graphics/ShaderSpirv.cpp +++ b/source/emulator/src/Graphics/ShaderSpirv.cpp @@ -1140,7 +1140,7 @@ public: void SetPsInputInfo(const ShaderPixelInputInfo* input_info) { m_ps_input_info = input_info; } [[nodiscard]] const ShaderPixelInputInfo* GetPsInputInfo() const { return m_ps_input_info; } - [[nodiscard]] const ShaderResources* GetBindInfo() const { return m_bind; } + [[nodiscard]] const ShaderBindResources* GetBindInfo() const { return m_bind; } void AddConstantUint(uint32_t u); void AddConstantInt(int i); @@ -1199,7 +1199,8 @@ private: const ShaderVertexInputInfo* m_vs_input_info = nullptr; const ShaderComputeInputInfo* m_cs_input_info = nullptr; const ShaderPixelInputInfo* m_ps_input_info = nullptr; - const ShaderResources* m_bind = nullptr; + const ShaderBindResources* m_bind = nullptr; + ShaderBindParameters m_bind_params; Core::Array2 m_extended_mapping {}; }; @@ -2126,12 +2127,12 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7) static const char32_t* text = UR"( %t24_ = OpLoad %uint % - %t26_ = OpAccessChain %_ptr_UniformConstant_SampledImage %textures2D %t24_ - %t27_ = OpLoad %SampledImage %t26_ + %t26_ = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_ + %t27_ = OpLoad %Image %t26_ %t33_ = OpLoad %uint % %t35_ = OpAccessChain %_ptr_UniformConstant_Sampler %samplers %t33_ %t36_ = OpLoad %Sampler %t35_ - %t38_ = OpSampledImage %_SampledImage %t27_ %t36_ + %t38_ = OpSampledImage %SampledImage %t27_ %t36_ %t39_ = OpLoad %float % %t40_ = OpLoad %float % %t42_ = OpCompositeConstruct %v2float %t39_ %t40_ @@ -2191,12 +2192,12 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF) static const char32_t* text = UR"( %t24_ = OpLoad %uint % - %t26_ = OpAccessChain %_ptr_UniformConstant_SampledImage %textures2D %t24_ - %t27_ = OpLoad %SampledImage %t26_ + %t26_ = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_ + %t27_ = OpLoad %Image %t26_ %t33_ = OpLoad %uint % %t35_ = OpAccessChain %_ptr_UniformConstant_Sampler %samplers %t33_ %t36_ = OpLoad %Sampler %t35_ - %t38_ = OpSampledImage %_SampledImage %t27_ %t36_ + %t38_ = OpSampledImage %SampledImage %t27_ %t36_ %t39_ = OpLoad %float % %t40_ = OpLoad %float % %t42_ = OpCompositeConstruct %v2float %t39_ %t40_ @@ -2233,6 +2234,72 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF) return false; } +KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF) +{ + const auto& inst = code.GetInstructions().At(index); + const auto* bind_info = spirv->GetBindInfo(); + + if (bind_info != nullptr && bind_info->textures2D.textures_num > 0) + { + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + auto dst_value2 = operand_variable_to_str(inst.dst, 2); + auto dst_value3 = operand_variable_to_str(inst.dst, 3); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + auto src0_value1 = operand_variable_to_str(inst.src[0], 1); + auto src0_value2 = operand_variable_to_str(inst.src[0], 2); + auto src1_value0 = operand_variable_to_str(inst.src[1], 0); + + EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Float); + EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Float); + EXIT_NOT_IMPLEMENTED(src1_value0.type != SpirvType::Uint); + + // TODO() check VSKIP + // TODO() check LOD_CLAMPED + // TODO() swizzle channels + // TODO() convert SRGB -> LINEAR if SRGB format was replaced with UNORM + + static const char32_t* text = UR"( + %t24_ = OpLoad %uint % + %t26_ = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_ + %t27_ = OpLoad %Image %t26_ + %t67_ = OpLoad %float % + %t69_ = OpBitcast %uint %t67_ + %t70_ = OpLoad %float % + %t71_ = OpBitcast %uint %t70_ + %t73_ = OpCompositeConstruct %v2uint %t69_ %t71_ + %t74_ = OpImageRead %v4float %t27_ %t73_ + OpStore %temp_v4float %t74_ + %t46_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_0 + %t47_ = OpLoad %float %t46_ + OpStore % %t47_ + %t50_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_1 + %t51_ = OpLoad %float %t50_ + OpStore % %t51_ + %t54_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_2 + %t55_ = OpLoad %float %t54_ + OpStore % %t55_ + %t57_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_3 + %t58_ = OpLoad %float %t57_ + OpStore % %t58_ +)"; + *dst_source += String(text) + .ReplaceStr(U"", String::FromPrintf("%u", index)) + .ReplaceStr(U"", src0_value0.value) + .ReplaceStr(U"", src0_value1.value) + .ReplaceStr(U"", src0_value2.value) + .ReplaceStr(U"", src1_value0.value) + .ReplaceStr(U"", dst_value0.value) + .ReplaceStr(U"", dst_value1.value) + .ReplaceStr(U"", dst_value2.value) + .ReplaceStr(U"", dst_value3.value); + + return true; + } + + return false; +} + /* XXX: Andn2, Or, Nor, Cselect */ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12) { @@ -3529,7 +3596,7 @@ KYTY_RECOMPILER_FUNC(Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1) return true; } -/* XXX: Gt */ +/* XXX: Gt, Ge */ KYTY_RECOMPILER_FUNC(Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1) { const auto& inst = code.GetInstructions().At(index); @@ -4463,6 +4530,7 @@ static RecompilerFunc g_recomp_func[] = { {Recompile_Exp_Param_XXX_Vsrc0Vsrc1Vsrc2Vsrc3, ShaderInstructionType::Exp, ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3, {U"param3"}}, {Recompile_Exp_Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done, ShaderInstructionType::Exp, ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done, {U""}}, + {Recompile_ImageLoad_Vdata4Vaddr3StDmaskF, ShaderInstructionType::ImageLoad, ShaderInstructionFormat::Vdata4Vaddr3StDmaskF, {U""}}, {Recompile_ImageSample_Vdata3Vaddr3StSsDmask7, ShaderInstructionType::ImageSample, ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7, {U""}}, {Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF, ShaderInstructionType::ImageSample, ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF, {U""}}, @@ -4484,12 +4552,28 @@ static RecompilerFunc g_recomp_func[] = { U"%tb_ = OpBitwiseAnd %uint %t0_ %ta_", U"%tc_ = OpNot %uint %t3_", U"%td_ = OpBitwiseAnd %uint %t1_ %tc_"}, SccCheck::NonZero}, + {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrn2B64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_ = OpNot %uint %t2_", + U"%tb_ = OpBitwiseOr %uint %t0_ %ta_", + U"%tc_ = OpNot %uint %t3_", + U"%td_ = OpBitwiseOr %uint %t1_ %tc_"}, SccCheck::NonZero}, + {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SAndB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_ = OpBitwiseAnd %uint %t0_ %t2_", + U"%td_ = OpBitwiseAnd %uint %t1_ %t3_"}, SccCheck::NonZero}, {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_ = OpBitwiseOr %uint %t0_ %t2_", U"%tb_ = OpNot %uint %ta_", U"%tc_ = OpBitwiseOr %uint %t1_ %t3_", U"%td_ = OpNot %uint %tc_"}, SccCheck::NonZero}, + {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNandB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_ = OpBitwiseAnd %uint %t0_ %t2_", + U"%tb_ = OpNot %uint %ta_", + U"%tc_ = OpBitwiseAnd %uint %t1_ %t3_", + U"%td_ = OpNot %uint %tc_"}, SccCheck::NonZero}, + {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SXnorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_ = OpBitwiseXor %uint %t0_ %t2_", + U"%tb_ = OpNot %uint %ta_", + U"%tc_ = OpBitwiseXor %uint %t1_ %t3_", + U"%td_ = OpNot %uint %tc_"}, SccCheck::NonZero}, {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_ = OpBitwiseOr %uint %t0_ %t2_", U"%td_ = OpBitwiseOr %uint %t1_ %t3_"}, SccCheck::NonZero}, + {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SXorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_ = OpBitwiseXor %uint %t0_ %t2_", + U"%td_ = OpBitwiseXor %uint %t1_ %t3_"}, SccCheck::NonZero}, {Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SCselectB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ts_ = OpLoad %uint %scc", U"%tsb_ = OpINotEqual %bool %ts_ %uint_0", U"%tb_ = OpSelect %uint %tsb_ %t0_ %t2_", @@ -4531,6 +4615,7 @@ static RecompilerFunc g_recomp_func[] = { {Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovB32, ShaderInstructionFormat::SVdstSVsrc0, {U""}}, + {Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovkI32, ShaderInstructionFormat::SVdstSVsrc0, {U""}}, {Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VBfrevB32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_ = OpBitReverse %uint %t0_"}}, {Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VNotB32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_ = OpNot %uint %t0_"}}, {Recompile_V_XXX_F32_SVdstSVsrc0, ShaderInstructionType::VRcpF32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_ = OpFDiv %float %float_1_000000 %t0_"}}, @@ -4602,6 +4687,7 @@ static RecompilerFunc g_recomp_func[] = { {Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxEqU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpIEqual"}}, {Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxNeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}}, {Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxGtU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThan"}}, + {Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxGeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThanEqual"}}, {Recompile_SCmp_XXX_I32_Ssrc0Ssrc1, ShaderInstructionType::SCmpEqI32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpIEqual"}}, {Recompile_SCmp_XXX_I32_Ssrc0Ssrc1, ShaderInstructionType::SCmpGeI32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpSGreaterThanEqual"}}, @@ -4828,10 +4914,22 @@ void Spirv::GenerateSource() switch (m_code.GetType()) { - case ShaderType::Pixel: m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr); break; - case ShaderType::Vertex: m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr); break; - case ShaderType::Compute: m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr); break; - default: m_bind = nullptr; break; + case ShaderType::Pixel: + m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr); + m_bind_params = (m_ps_input_info != nullptr ? ShaderGetBindParametersPS(m_code, m_ps_input_info) : ShaderBindParameters()); + break; + case ShaderType::Vertex: + m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr); + m_bind_params = (m_vs_input_info != nullptr ? ShaderGetBindParametersVS(m_code, m_vs_input_info) : ShaderBindParameters()); + break; + case ShaderType::Compute: + m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr); + m_bind_params = (m_cs_input_info != nullptr ? ShaderGetBindParametersCS(m_code, m_cs_input_info) : ShaderBindParameters()); + break; + default: + m_bind = nullptr; + m_bind_params = ShaderBindParameters(); + break; } WriteHeader(); @@ -5180,28 +5278,36 @@ void Spirv::WriteTypes() } static const char32_t* storage_buffers_types = UR"( - %buffers_runtimearr_float = OpTypeRuntimeArray %float - %BufferObject = OpTypeStruct %buffers_runtimearr_float - %buffers_num_uint_ = OpConstant %uint - %_arr_BufferObject_uint_ = OpTypeArray %BufferObject %buffers_num_uint_ + %buffers_runtimearr_float = OpTypeRuntimeArray %float + %BufferObject = OpTypeStruct %buffers_runtimearr_float + %buffers_num_uint_ = OpConstant %uint + %_arr_BufferObject_uint_ = OpTypeArray %BufferObject %buffers_num_uint_ %_ptr_StorageBuffer__arr_BufferObject_uint_ = OpTypePointer StorageBuffer %_arr_BufferObject_uint_ )"; - static const char32_t* textures_types = UR"( - %SampledImage = OpTypeImage %float 2D 0 0 0 1 Unknown - %textures2D_uint_ = OpConstant %uint - %_arr_SampledImage_uint_ = OpTypeArray %SampledImage %textures2D_uint_ -%_ptr_UniformConstant__arr_SampledImage_uint_ = OpTypePointer UniformConstant %_arr_SampledImage_uint_ - %_ptr_UniformConstant_SampledImage = OpTypePointer UniformConstant %SampledImage - %_SampledImage = OpTypeSampledImage %SampledImage + static const char32_t* textures_sampled_types = UR"( + %Image = OpTypeImage %float 2D 0 0 0 1 Unknown + %textures2D_uint_ = OpConstant %uint + %_arr_Image_uint_ = OpTypeArray %Image %textures2D_uint_ +%_ptr_UniformConstant__arr_Image_uint_ = OpTypePointer UniformConstant %_arr_Image_uint_ + %_ptr_UniformConstant_Image = OpTypePointer UniformConstant %Image + %SampledImage = OpTypeSampledImage %Image +)"; + + static const char32_t* textures_loaded_types = UR"( + %Image = OpTypeImage %float 2D 0 0 0 2 Rgba8 + %textures2D_uint_ = OpConstant %uint + %_arr_Image_uint_ = OpTypeArray %Image %textures2D_uint_ +%_ptr_UniformConstant__arr_Image_uint_ = OpTypePointer UniformConstant %_arr_Image_uint_ + %_ptr_UniformConstant_Image = OpTypePointer UniformConstant %Image )"; static const char32_t* samplers_types = UR"( - %Sampler = OpTypeSampler - %samplers_uint_ = OpConstant %uint - %_arr_Sampler_uint_ = OpTypeArray %Sampler %samplers_uint_ + %Sampler = OpTypeSampler + %samplers_uint_ = OpConstant %uint + %_arr_Sampler_uint_ = OpTypeArray %Sampler %samplers_uint_ %_ptr_UniformConstant__arr_Sampler_uint_ = OpTypePointer UniformConstant %_arr_Sampler_uint_ - %_ptr_UniformConstant_Sampler = OpTypePointer UniformConstant %Sampler + %_ptr_UniformConstant_Sampler = OpTypePointer UniformConstant %Sampler )"; static const char32_t* gds_types = UR"( @@ -5211,13 +5317,13 @@ void Spirv::WriteTypes() )"; static const char32_t* vsharp_types = UR"( - %vsharp_buffers_num_uint_ = OpConstant %uint - %vsharp_num_uint_4 = OpConstant %uint 4 - %vsharp_arr_uint_uint_4 = OpTypeArray %uint %vsharp_num_uint_4 + %vsharp_buffers_num_uint_ = OpConstant %uint + %vsharp_num_uint_4 = OpConstant %uint 4 + %vsharp_arr_uint_uint_4 = OpTypeArray %uint %vsharp_num_uint_4 %vsharp_arr__arr_uint_uint_4_uint_ = OpTypeArray %vsharp_arr_uint_uint_4 %vsharp_buffers_num_uint_ - %BufferResource = OpTypeStruct %vsharp_arr__arr_uint_uint_4_uint_ - %_ptr_PushConstant_BufferResource = OpTypePointer PushConstant %BufferResource - %_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %BufferResource = OpTypeStruct %vsharp_arr__arr_uint_uint_4_uint_ + %_ptr_PushConstant_BufferResource = OpTypePointer PushConstant %BufferResource + %_ptr_PushConstant_uint = OpTypePointer PushConstant %uint )"; if (m_bind != nullptr) @@ -5229,7 +5335,8 @@ void Spirv::WriteTypes() } if (m_bind->textures2D.textures_num > 0) { - m_source += String(textures_types).ReplaceStr(U"", String::FromPrintf("%d", m_bind->textures2D.textures_num)); + m_source += String(m_bind_params.textures2D_without_sampler ? textures_loaded_types : textures_sampled_types) + .ReplaceStr(U"", String::FromPrintf("%d", m_bind->textures2D.textures_num)); } if (m_bind->samplers.samplers_num > 0) { @@ -5297,7 +5404,7 @@ void Spirv::WriteGlobalVariables() } if (m_bind->textures2D.textures_num > 0) { - vars.Add(String::FromPrintf("%%textures2D = OpVariable %%_ptr_UniformConstant__arr_SampledImage_uint_%d UniformConstant", + vars.Add(String::FromPrintf("%%textures2D = OpVariable %%_ptr_UniformConstant__arr_Image_uint_%d UniformConstant", m_bind->textures2D.textures_num)); } if (m_bind->samplers.samplers_num > 0) diff --git a/source/emulator/src/Graphics/Texture.cpp b/source/emulator/src/Graphics/Texture.cpp index d5d2012..9e60ac4 100644 --- a/source/emulator/src/Graphics/Texture.cpp +++ b/source/emulator/src/Graphics/Texture.cpp @@ -47,6 +47,63 @@ static VkComponentSwizzle get_swizzle(uint8_t s) return VK_COMPONENT_SWIZZLE_IDENTITY; } +static bool CheckFormat(GraphicContext* ctx, VkImageCreateInfo* image_info) +{ + VkImageFormatProperties props {}; + if (vkGetPhysicalDeviceImageFormatProperties(ctx->physical_device, image_info->format, image_info->imageType, image_info->tiling, + image_info->usage, image_info->flags, &props) == VK_ERROR_FORMAT_NOT_SUPPORTED) + { + if (image_info->format == VK_FORMAT_R8G8B8A8_SRGB) + { + // TODO() convert SRGB -> LINEAR in shader + image_info->format = VK_FORMAT_R8G8B8A8_UNORM; + bool result = CheckFormat(ctx, image_info); + printf("replace VK_FORMAT_R8G8B8A8_SRGB => VK_FORMAT_R8G8B8A8_UNORM [%s]\n", (!result ? "FAIL" : "SUCCESS")); + return result; + } + if (image_info->format == VK_FORMAT_B8G8R8A8_SRGB) + { + // TODO() convert SRGB -> LINEAR in shader + image_info->format = VK_FORMAT_B8G8R8A8_UNORM; + bool result = CheckFormat(ctx, image_info); + printf("replace VK_FORMAT_B8G8R8A8_SRGB => VK_FORMAT_B8G8R8A8_UNORM [%s]\n", (!result ? "FAIL" : "SUCCESS")); + return result; + } + return false; + } + return true; +} + +static bool CheckSwizzle(GraphicContext* /*ctx*/, VkImageCreateInfo* image_info, VkComponentMapping* components) +{ + if ((image_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) != 0) + { + if (components->r == VK_COMPONENT_SWIZZLE_R && components->g == VK_COMPONENT_SWIZZLE_G && components->b == VK_COMPONENT_SWIZZLE_B && + components->a == VK_COMPONENT_SWIZZLE_A) + { + return true; + } + + if (components->r == VK_COMPONENT_SWIZZLE_B && components->g == VK_COMPONENT_SWIZZLE_G && components->b == VK_COMPONENT_SWIZZLE_R && + components->a == VK_COMPONENT_SWIZZLE_A && image_info->format == VK_FORMAT_R8G8B8A8_SRGB) + { + printf("replace VK_FORMAT_R8G8B8A8_SRGB => VK_FORMAT_B8G8R8A8_SRGB\n"); + + components->r = VK_COMPONENT_SWIZZLE_R; + components->g = VK_COMPONENT_SWIZZLE_G; + components->b = VK_COMPONENT_SWIZZLE_B; + components->a = VK_COMPONENT_SWIZZLE_A; + image_info->format = VK_FORMAT_B8G8R8A8_SRGB; + return true; + } + + // TODO() swizzle channels in shader + + return false; + } + return true; +} + void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, VulkanMemory* mem) const { KYTY_PROFILER_BLOCK("TextureObject::Create"); @@ -55,12 +112,29 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui EXIT_IF(mem == nullptr); EXIT_IF(ctx == nullptr); - auto dfmt = params[PARAM_DFMT]; - auto nfmt = params[PARAM_NFMT]; - auto width = params[PARAM_WIDTH]; - auto height = params[PARAM_HEIGHT]; + auto dfmt = params[PARAM_DFMT_NFMT] >> 32u; + auto nfmt = params[PARAM_DFMT_NFMT] & 0xffffffffu; + auto width = params[PARAM_WIDTH_HEIGHT] >> 32u; + auto height = params[PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto levels = params[PARAM_LEVELS]; auto swizzle = params[PARAM_SWIZZLE]; + auto usage = params[PARAM_USAGE]; + + VkImageUsageFlags vk_usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + switch (usage) + { + case TEXTURE_USAGE_SAMPLED: vk_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; break; + case TEXTURE_USAGE_STORAGE: vk_usage |= VK_IMAGE_USAGE_STORAGE_BIT; break; + default: EXIT("unknown usage: %u\n", static_cast(usage)); + } + + VkComponentMapping components {}; + + components.r = get_swizzle(swizzle & 0xffu); + components.g = get_swizzle((swizzle >> 8u) & 0xffu); + components.b = get_swizzle((swizzle >> 16u) & 0xffu); + components.a = get_swizzle((swizzle >> 24u) & 0xffu); auto pixel_format = get_texture_format(dfmt, nfmt); @@ -70,29 +144,39 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui auto* vk_obj = new TextureVulkanImage; - vk_obj->extent.width = width; - vk_obj->extent.height = height; - vk_obj->format = pixel_format; - vk_obj->image = nullptr; - vk_obj->image_view = nullptr; - VkImageCreateInfo image_info {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; image_info.pNext = nullptr; image_info.flags = 0; image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.extent.width = vk_obj->extent.width; - image_info.extent.height = vk_obj->extent.height; + image_info.extent.width = width; + image_info.extent.height = height; image_info.extent.depth = 1; image_info.mipLevels = levels; image_info.arrayLayers = 1; - image_info.format = vk_obj->format; + image_info.format = pixel_format; image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.usage = vk_usage; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.samples = VK_SAMPLE_COUNT_1_BIT; + if (!CheckSwizzle(ctx, &image_info, &components)) + { + EXIT("swizzle is not supported"); + } + + if (!CheckFormat(ctx, &image_info)) + { + EXIT("format is not supported"); + } + + vk_obj->extent.width = width; + vk_obj->extent.height = height; + vk_obj->format = image_info.format; + vk_obj->image = nullptr; + vk_obj->image_view = nullptr; + vkCreateImage(ctx->device, &image_info, nullptr, &vk_obj->image); EXIT_NOT_IMPLEMENTED(vk_obj->image == nullptr); @@ -109,8 +193,6 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui vk_obj->memory = *mem; - // EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size); - GetUpdateFunc()(ctx, params, vk_obj, vaddr, size, vaddr_num); VkImageViewCreateInfo create_info {}; @@ -120,10 +202,7 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui create_info.image = vk_obj->image; create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; create_info.format = vk_obj->format; - create_info.components.r = get_swizzle(swizzle & 0xffu); - create_info.components.g = get_swizzle((swizzle >> 8u) & 0xffu); - create_info.components.b = get_swizzle((swizzle >> 16u) & 0xffu); - create_info.components.a = get_swizzle((swizzle >> 24u) & 0xffu); + create_info.components = components; create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; create_info.subresourceRange.baseArrayLayer = 0; create_info.subresourceRange.baseMipLevel = 0; @@ -149,24 +228,36 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, auto* vk_obj = static_cast(obj); bool tile = (params[TextureObject::PARAM_TILE] != 0); - auto dfmt = params[TextureObject::PARAM_DFMT]; - auto nfmt = params[TextureObject::PARAM_NFMT]; - auto width = params[TextureObject::PARAM_WIDTH]; - auto height = params[TextureObject::PARAM_HEIGHT]; + auto dfmt = params[TextureObject::PARAM_DFMT_NFMT] >> 32u; + auto nfmt = params[TextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto width = params[TextureObject::PARAM_WIDTH_HEIGHT] >> 32u; + auto height = params[TextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto levels = params[TextureObject::PARAM_LEVELS]; + auto pitch = params[TextureObject::PARAM_PITCH]; + auto usage = params[TextureObject::PARAM_USAGE]; bool neo = Config::IsNeo(); + VkImageLayout vk_layout = VK_IMAGE_LAYOUT_UNDEFINED; + + switch (usage) + { + case TextureObject::TEXTURE_USAGE_SAMPLED: vk_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; + case TextureObject::TEXTURE_USAGE_STORAGE: vk_layout = VK_IMAGE_LAYOUT_GENERAL; break; + default: EXIT("unknown usage: %u\n", static_cast(usage)); + } + EXIT_NOT_IMPLEMENTED(levels >= 16); uint32_t level_sizes[16]; - TileGetTextureSize(dfmt, nfmt, width, height, levels, tile, neo, nullptr, level_sizes, nullptr, nullptr); + TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr, nullptr); // dbg_test_mipmaps(ctx, VK_FORMAT_BC3_SRGB_BLOCK, 512, 512); uint32_t offset = 0; uint32_t mip_width = width; uint32_t mip_height = height; + uint32_t mip_pitch = pitch; Vector regions(levels); for (uint32_t i = 0; i < levels; i++) @@ -176,6 +267,7 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, regions[i].offset = offset; regions[i].width = mip_width; regions[i].height = mip_height; + regions[i].pitch = mip_pitch; offset += level_sizes[i]; @@ -187,24 +279,29 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, { mip_height /= 2; } + if (mip_pitch > 1) + { + mip_pitch /= 2; + } } if (tile) { + EXIT_NOT_IMPLEMENTED(pitch != width); auto* temp_buf = new uint8_t[*size]; TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::TextureTiled, dfmt, nfmt, width, height, levels, neo); - UtilFillImage(ctx, vk_obj, temp_buf, *size, regions); + UtilFillImage(ctx, vk_obj, temp_buf, *size, regions, static_cast(vk_layout)); delete[] temp_buf; } else { - UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, regions); + UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, regions, static_cast(vk_layout)); } } bool TextureObject::Equal(const uint64_t* other) const { - return (params[PARAM_DFMT] == other[PARAM_DFMT] && params[PARAM_NFMT] == other[PARAM_NFMT] && - params[PARAM_WIDTH] == other[PARAM_WIDTH] && params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && + return (params[PARAM_DFMT_NFMT] == other[PARAM_DFMT_NFMT] && params[PARAM_PITCH] == other[PARAM_PITCH] && + params[PARAM_WIDTH_HEIGHT] == other[PARAM_WIDTH_HEIGHT] && params[PARAM_USAGE] == other[PARAM_USAGE] && params[PARAM_LEVELS] == other[PARAM_LEVELS] && params[PARAM_TILE] == other[PARAM_TILE] && params[PARAM_NEO] == other[PARAM_NEO] && params[PARAM_SWIZZLE] == other[PARAM_SWIZZLE]); } diff --git a/source/emulator/src/Graphics/Tile.cpp b/source/emulator/src/Graphics/Tile.cpp index 1c7ee72..0e07dba 100644 --- a/source/emulator/src/Graphics/Tile.cpp +++ b/source/emulator/src/Graphics/Tile.cpp @@ -423,7 +423,7 @@ void TileConvertTiledToLinear(void* dst, const void* src, TileMode mode, uint32_ uint32_t padded_height[16] = {0}; uint32_t level_sizes[16] = {0}; - TileGetTextureSize(dfmt, nfmt, width, height, levels, true, neo, nullptr, level_sizes, padded_width, padded_height); + TileGetTextureSize(dfmt, nfmt, width, height, width, levels, true, neo, nullptr, level_sizes, padded_width, padded_height); uint32_t mip_width = width; uint32_t mip_height = height; @@ -558,45 +558,60 @@ void TileGetDepthSize(uint32_t width, uint32_t height, uint32_t z_format, uint32 *stencil_size = 0; } -void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size) +void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size, uint32_t* pitch) { EXIT_IF(size == nullptr); + EXIT_IF(pitch == nullptr); + + uint32_t ret_size = 0; + uint32_t ret_pitch = 0; if (width == 1920 && height == 1080 && tile && !neo) { - *size = 8355840; + ret_size = 8355840; + ret_pitch = 1920; } if (width == 1920 && height == 1080 && tile && neo) { - *size = 8847360; + ret_size = 8847360; + ret_pitch = 1920; } if (width == 1920 && height == 1080 && !tile && !neo) { - *size = 8294400; + ret_size = 8294400; + ret_pitch = 1920; } if (width == 1920 && height == 1080 && !tile && neo) { - *size = 8294400; + ret_size = 8294400; + ret_pitch = 1920; } if (width == 1280 && height == 720 && tile && !neo) { - *size = 3932160; + ret_size = 3932160; + ret_pitch = 1280; } if (width == 1280 && height == 720 && tile && neo) { - *size = 3932160; + ret_size = 3932160; + ret_pitch = 1280; } if (width == 1280 && height == 720 && !tile && !neo) { - *size = 3686400; + ret_size = 3686400; + ret_pitch = 1280; } if (width == 1280 && height == 720 && !tile && neo) { - *size = 3686400; + ret_size = 3686400; + ret_pitch = 1280; } + + *size = ret_size; + *pitch = ret_pitch; } -void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool tile, bool neo, +void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool tile, bool neo, uint32_t* total_size, uint32_t* level_sizes, uint32_t* padded_width, uint32_t* padded_height) { struct Padded @@ -643,8 +658,8 @@ void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t h for (const auto& i: infos) { - if (i.dfmt == dfmt && i.nfmt == nfmt && i.width == width && i.height == height && i.levels >= levels && i.tile == tile && - i.neo == neo) + if (i.dfmt == dfmt && i.nfmt == nfmt && i.width == width && i.width == pitch && i.height == height && i.levels >= levels && + i.tile == tile && i.neo == neo) { for (uint32_t l = 0; l < levels; l++) { @@ -665,6 +680,20 @@ void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t h padded_height[l] = i.padded[l].height; } } + return; + } + } + + if (!tile && levels == 1 && dfmt == 10 && nfmt == 9) + { + uint32_t size = pitch * height * 4; + if (total_size != nullptr) + { + *total_size = size; + } + if (level_sizes != nullptr) + { + level_sizes[0] = size; } } } diff --git a/source/emulator/src/Graphics/Utils.cpp b/source/emulator/src/Graphics/Utils.cpp index 6317fba..bd66e6a 100644 --- a/source/emulator/src/Graphics/Utils.cpp +++ b/source/emulator/src/Graphics/Utils.cpp @@ -82,7 +82,7 @@ static void set_image_layout(VkCommandBuffer buffer, VkImage image, uint32_t lev vkCmdPipelineBarrier(buffer, src_stages, dest_stages, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } -void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOutVulkanImage* dst_image) +void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, uint32_t src_pitch, VideoOutVulkanImage* dst_image) { EXIT_IF(src_buffer == nullptr); EXIT_IF(src_buffer->buffer == nullptr); @@ -96,7 +96,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOut VkBufferImageCopy region {}; region.bufferOffset = 0; - region.bufferRowLength = 0; + region.bufferRowLength = (src_pitch != dst_image->extent.width ? src_pitch : 0); region.bufferImageHeight = 0; region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; @@ -114,7 +114,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOut } void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureVulkanImage* dst_image, - const Vector& regions) + const Vector& regions, uint64_t dst_layout) { EXIT_IF(src_buffer == nullptr); EXIT_IF(src_buffer->buffer == nullptr); @@ -131,7 +131,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureV for (const auto& r: regions) { region[index].bufferOffset = r.offset; - region[index].bufferRowLength = 0; + region[index].bufferRowLength = (r.width != r.pitch ? r.pitch : 0); region[index].bufferImageHeight = 0; region[index].imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; region[index].imageSubresource.mipLevel = index; @@ -148,7 +148,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureV vkCmdCopyBufferToImage(vk_buffer, src_buffer->buffer, dst_image->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, index, region); set_image_layout(vk_buffer, dst_image->image, index, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + /*VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/ static_cast(dst_layout)); } void UtilBlitImage(CommandBuffer* buffer, VideoOutVulkanImage* src_image, VulkanSwapchain* dst_swapchain) @@ -231,7 +231,7 @@ void VulkanDeleteBuffer(GraphicContext* gctx, VulkanBuffer* buffer) buffer->buffer = nullptr; } -void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* src_data, uint64_t size) +void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* src_data, uint64_t size, uint32_t src_pitch) { KYTY_PROFILER_FUNCTION(); @@ -244,10 +244,8 @@ void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* VulkanCreateBuffer(ctx, size, &staging_buffer); void* data = nullptr; - // vkMapMemory(ctx->device, staging_buffer.memory.memory, staging_buffer.memory.offset, size, 0, &data); VulkanMapMemory(ctx, &staging_buffer.memory, &data); std::memcpy(data, src_data, size); - // vkUnmapMemory(ctx->device, staging_buffer.memory.memory); VulkanUnmapMemory(ctx, &staging_buffer.memory); CommandBuffer buffer; @@ -256,7 +254,7 @@ void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* EXIT_NOT_IMPLEMENTED(buffer.IsInvalid()); buffer.Begin(); - UtilBufferToImage(&buffer, &staging_buffer, image); + UtilBufferToImage(&buffer, &staging_buffer, src_pitch, image); buffer.End(); buffer.Execute(); buffer.WaitForFence(); @@ -310,7 +308,7 @@ void UtilSetImageLayoutOptimal(VideoOutVulkanImage* image) } void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* image, const void* src_data, uint64_t size, - const Vector& regions) + const Vector& regions, uint64_t dst_layout) { EXIT_IF(ctx == nullptr); EXIT_IF(image == nullptr); @@ -331,7 +329,7 @@ void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* image, const void* s EXIT_NOT_IMPLEMENTED(buffer.IsInvalid()); buffer.Begin(); - UtilBufferToImage(&buffer, &staging_buffer, image, regions); + UtilBufferToImage(&buffer, &staging_buffer, image, regions, dst_layout); buffer.End(); buffer.Execute(); buffer.WaitForFence(); diff --git a/source/emulator/src/Graphics/VideoOut.cpp b/source/emulator/src/Graphics/VideoOut.cpp index dda4bb4..4be9c14 100644 --- a/source/emulator/src/Graphics/VideoOut.cpp +++ b/source/emulator/src/Graphics/VideoOut.cpp @@ -5,6 +5,7 @@ #include "Kyty/Core/LinkList.h" #include "Kyty/Core/String.h" #include "Kyty/Core/Threads.h" +#include "Kyty/Core/Vector.h" #include "Emulator/Common.h" #include "Emulator/Config.h" @@ -30,7 +31,9 @@ LIB_NAME("VideoOut", "VideoOut"); namespace EventQueue = LibKernel::EventQueue; -constexpr int VIDEO_OUT_EVENT_FLIP = 0; +constexpr int VIDEO_OUT_EVENT_FLIP = 0; +constexpr int VIDEO_OUT_EVENT_VBLANK = 1; +constexpr int VIDEO_OUT_EVENT_PRE_VBLANK_START = 2; struct VideoOutResolutionStatus { @@ -72,6 +75,16 @@ struct VideoOutFlipStatus uint32_t reserved1 = 0; }; +struct VideoOutVblankStatus +{ + uint64_t count = 0; + uint64_t processTime = 0; + uint64_t tsc = 0; + uint64_t reserved[1] = {0}; + uint8_t flags = 0; + uint8_t pad1[7] = {}; +}; + struct VideoOutBufferSet { VideoOutBufferAttribute attr = {}; @@ -84,19 +97,25 @@ struct VideoOutBufferInfo void* buffer = nullptr; Graphics::VideoOutVulkanImage* buffer_vulkan = nullptr; uint64_t buffer_size = 0; + uint64_t buffer_pitch = 0; int set_id = 0; }; struct VideoOutConfig { - VideoOutResolutionStatus resolution; - bool opened = false; - int flip_rate = 0; - EventQueue::KernelEqueue flip_eq = nullptr; - VideoOutFlipStatus flip_status; - VideoOutBufferInfo buffers[16]; - VideoOutBufferSet buffers_sets[16]; - int buffers_sets_num = 0; + Core::Mutex mutex; + VideoOutResolutionStatus resolution; + bool opened = false; + int flip_rate = 0; + Vector flip_eqs; + Vector pre_vblank_eqs; + Vector vblank_eqs; + VideoOutFlipStatus flip_status; + VideoOutVblankStatus pre_vblank_status; + VideoOutVblankStatus vblank_status; + VideoOutBufferInfo buffers[16]; + VideoOutBufferSet buffers_sets[16]; + int buffers_sets_num = 0; }; class FlipQueue @@ -158,6 +177,9 @@ public: FlipQueue& GetFlipQueue() { return m_flip_queue; } + void VblankBegin(); + void VblankEnd(); + private: Core::Mutex m_mutex; VideoOutConfig m_video_out_ctx[VIDEO_OUT_NUM_MAX]; @@ -167,8 +189,11 @@ private: static VideoOutContext* g_video_out_context = nullptr; -static uint64_t calc_buffer_size(const VideoOutBufferAttribute* attribute) +static void calc_buffer_size(const VideoOutBufferAttribute* attribute, uint64_t* size, uint64_t* pitch) { + EXIT_IF(size == nullptr); + EXIT_IF(pitch == nullptr); + bool tile = attribute->tilingMode == 0; bool neo = Config::IsNeo(); uint32_t width = attribute->width; @@ -179,10 +204,12 @@ static uint64_t calc_buffer_size(const VideoOutBufferAttribute* attribute) EXIT_NOT_IMPLEMENTED(attribute->aspectRatio != 0); EXIT_NOT_IMPLEMENTED(attribute->pixelFormat != 0x80000000); - uint32_t size = 0; - Graphics::TileGetVideoOutSize(width, height, tile, neo, &size); + uint32_t size32 = 0; + uint32_t pitch32 = 0; + Graphics::TileGetVideoOutSize(width, height, tile, neo, &size32, &pitch32); - return size; + *size = size32; + *pitch = pitch32; } void VideoOutInit(uint32_t width, uint32_t height) @@ -220,7 +247,9 @@ int VideoOutContext::Open() } } - EXIT_IF(m_video_out_ctx[handle].flip_eq != nullptr); + EXIT_IF(!m_video_out_ctx[handle].flip_eqs.IsEmpty()); + EXIT_IF(!m_video_out_ctx[handle].pre_vblank_eqs.IsEmpty()); + EXIT_IF(!m_video_out_ctx[handle].vblank_eqs.IsEmpty()); EXIT_IF(m_video_out_ctx[handle].flip_rate != 0); m_video_out_ctx[handle].opened = true; @@ -228,6 +257,8 @@ int VideoOutContext::Open() m_video_out_ctx[handle].flip_status.flipArg = -1; m_video_out_ctx[handle].flip_status.currentBuffer = -1; m_video_out_ctx[handle].flip_status.count = 0; + m_video_out_ctx[handle].pre_vblank_status = VideoOutVblankStatus(); + m_video_out_ctx[handle].vblank_status = VideoOutVblankStatus(); return handle; } @@ -241,11 +272,35 @@ void VideoOutContext::Close(int handle) m_video_out_ctx[handle].opened = false; - if (m_video_out_ctx[handle].flip_eq != nullptr) + m_video_out_ctx[handle].mutex.Lock(); + for (auto& flip_eq: m_video_out_ctx[handle].flip_eqs) { - EventQueue::KernelDeleteEvent(m_video_out_ctx[handle].flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT); - EXIT_IF(m_video_out_ctx[handle].flip_eq != nullptr); + if (flip_eq != nullptr) + { + auto result = EventQueue::KernelDeleteEvent(flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT); + EXIT_NOT_IMPLEMENTED(result != OK); + } } + m_video_out_ctx[handle].flip_eqs.Clear(); + for (auto& vblank_eq: m_video_out_ctx[handle].pre_vblank_eqs) + { + if (vblank_eq != nullptr) + { + auto result = EventQueue::KernelDeleteEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT); + EXIT_NOT_IMPLEMENTED(result != OK); + } + } + m_video_out_ctx[handle].pre_vblank_eqs.Clear(); + for (auto& vblank_eq: m_video_out_ctx[handle].vblank_eqs) + { + if (vblank_eq != nullptr) + { + auto result = EventQueue::KernelDeleteEvent(vblank_eq, VIDEO_OUT_EVENT_PRE_VBLANK_START, EventQueue::KERNEL_EVFILT_VIDEO_OUT); + EXIT_NOT_IMPLEMENTED(result != OK); + } + } + m_video_out_ctx[handle].vblank_eqs.Clear(); + m_video_out_ctx[handle].mutex.Unlock(); m_video_out_ctx[handle].flip_rate = 0; @@ -270,6 +325,62 @@ VideoOutConfig* VideoOutContext::Get(int handle) return m_video_out_ctx + handle; } +void VideoOutContext::VblankBegin() +{ + Core::LockGuard lock(m_mutex); + + for (int i = 1; i < VIDEO_OUT_NUM_MAX; i++) + { + auto& ctx = m_video_out_ctx[i]; + if (ctx.opened) + { + ctx.mutex.Lock(); + ctx.pre_vblank_status.count++; + ctx.pre_vblank_status.processTime = LibKernel::KernelGetProcessTime(); + ctx.pre_vblank_status.tsc = LibKernel::KernelReadTsc(); + + for (auto& vblank_eq: ctx.pre_vblank_eqs) + { + if (vblank_eq != nullptr) + { + auto result = EventQueue::KernelTriggerEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT, + reinterpret_cast(ctx.pre_vblank_status.count)); + EXIT_NOT_IMPLEMENTED(result != OK); + } + } + ctx.mutex.Unlock(); + } + } +} + +void VideoOutContext::VblankEnd() +{ + Core::LockGuard lock(m_mutex); + + for (int i = 1; i < VIDEO_OUT_NUM_MAX; i++) + { + auto& ctx = m_video_out_ctx[i]; + if (ctx.opened) + { + ctx.mutex.Lock(); + ctx.vblank_status.count++; + ctx.vblank_status.processTime = LibKernel::KernelGetProcessTime(); + ctx.vblank_status.tsc = LibKernel::KernelReadTsc(); + + for (auto& vblank_eq: ctx.vblank_eqs) + { + if (vblank_eq != nullptr) + { + auto result = EventQueue::KernelTriggerEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT, + reinterpret_cast(ctx.vblank_status.count)); + EXIT_NOT_IMPLEMENTED(result != OK); + } + } + ctx.mutex.Unlock(); + } + } +} + VideoOutBufferImageInfo VideoOutContext::FindImage(void* buffer) { VideoOutBufferImageInfo ret; @@ -286,9 +397,10 @@ VideoOutBufferImageInfo VideoOutContext::FindImage(void* buffer) { if (ctx.buffers[j].buffer == buffer) { - ret.image = ctx.buffers[j].buffer_vulkan; - ret.buffer_size = ctx.buffers[j].buffer_size; - ret.index = j - ctx.buffers_sets[i].start_index; + ret.image = ctx.buffers[j].buffer_vulkan; + ret.buffer_size = ctx.buffers[j].buffer_size; + ret.buffer_pitch = ctx.buffers[j].buffer_pitch; + ret.index = j - ctx.buffers_sets[i].start_index; goto END; } } @@ -356,33 +468,24 @@ bool FlipQueue::Flip(uint32_t micros) auto* buffer = r.cfg->buffers[r.index].buffer_vulkan; - // if (buffer->framebuffer == nullptr) - // { - // // TODO(): Flush via GpuMemoryFlush() - // const auto& attribute = r.cfg->buffers_sets[r.cfg->buffers[r.index].set_id].attr; - // auto buffer_size = calc_buffer_size(&attribute); - // EXIT_NOT_IMPLEMENTED(buffer_size == 0); - // Graphics::VideoOutBufferObject vulkan_buffer_info(attribute.pixelFormat, attribute.width, attribute.height, - // (attribute.tilingMode == 0), Config::IsNeo()); - // r.cfg->buffers[r.index].buffer_vulkan = static_cast( - // Graphics::GpuMemoryGetObject(g_video_out_context->GetGraphicCtx(), - // reinterpret_cast(r.cfg->buffers[r.index].buffer), buffer_size, vulkan_buffer_info)); - // EXIT_NOT_IMPLEMENTED(r.cfg->buffers[r.index].buffer_vulkan != buffer); - // } - Graphics::WindowDrawBuffer(buffer); - if (r.cfg->flip_eq != nullptr) + m_mutex.Lock(); + + r.cfg->mutex.Lock(); + for (auto& flip_eq: r.cfg->flip_eqs) { - auto result = EventQueue::KernelTriggerEvent(r.cfg->flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT, - reinterpret_cast(r.flip_arg)); - EXIT_NOT_IMPLEMENTED(result != OK); + if (flip_eq != nullptr) + { + auto result = EventQueue::KernelTriggerEvent(flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT, + reinterpret_cast(r.flip_arg)); + EXIT_NOT_IMPLEMENTED(result != OK); + } } + r.cfg->mutex.Unlock(); printf("Flip done: %d\n", r.index); - m_mutex.Lock(); - m_requests.Remove(first); m_done_cond_var.Signal(); @@ -412,13 +515,27 @@ void FlipQueue::GetFlipStatus(VideoOutConfig* cfg, VideoOutFlipStatus* out) *out = cfg->flip_status; } -bool FlipWindow(uint32_t micros) +bool VideoOutFlipWindow(uint32_t micros) { EXIT_IF(g_video_out_context == nullptr); return g_video_out_context->GetFlipQueue().Flip(micros); } +void VideoOutBeginVblank() +{ + EXIT_IF(g_video_out_context == nullptr); + + g_video_out_context->VblankBegin(); +} + +void VideoOutEndVblank() +{ + EXIT_IF(g_video_out_context == nullptr); + + g_video_out_context->VblankEnd(); +} + KYTY_SYSV_ABI int VideoOutOpen(int user_id, int bus_type, int index, const void* param) { PRINT_NAME(); @@ -511,15 +628,23 @@ static void flip_event_reset_func(LibKernel::EventQueue::KernelEqueueEvent* even event->event.data = 0; } -static void flip_event_delete_func(LibKernel::EventQueue::KernelEqueueEvent* event) +static void flip_event_delete_func(EventQueue::KernelEqueue eq, LibKernel::EventQueue::KernelEqueueEvent* event) { EXIT_IF(event == nullptr); EXIT_IF(event->filter.data == nullptr); + + EXIT_NOT_IMPLEMENTED(event->event.ident != VIDEO_OUT_EVENT_FLIP); + EXIT_NOT_IMPLEMENTED(event->event.filter != EventQueue::KERNEL_EVFILT_VIDEO_OUT); + if (event->filter.data != nullptr) { auto* video_out = static_cast(event->filter.data); - EXIT_IF(video_out->flip_eq == nullptr); - video_out->flip_eq = nullptr; + video_out->mutex.Lock(); + EXIT_IF(video_out->flip_eqs.IsEmpty()); + auto index = video_out->flip_eqs.Find(eq); + EXIT_NOT_IMPLEMENTED(!video_out->flip_eqs.IndexValid(index)); + video_out->flip_eqs[index] = nullptr; + video_out->mutex.Unlock(); } } @@ -531,6 +656,42 @@ static void flip_event_trigger_func(LibKernel::EventQueue::KernelEqueueEvent* ev event->event.data = reinterpret_cast(trigger_data); } +static void vblank_event_reset_func(LibKernel::EventQueue::KernelEqueueEvent* event) +{ + EXIT_IF(event == nullptr); + event->triggered = false; + event->event.fflags = 0; + event->event.data = 0; +} + +static void vblank_event_delete_func(EventQueue::KernelEqueue eq, LibKernel::EventQueue::KernelEqueueEvent* event) +{ + EXIT_IF(event == nullptr); + EXIT_IF(event->filter.data == nullptr); + + EXIT_NOT_IMPLEMENTED(event->event.ident != VIDEO_OUT_EVENT_VBLANK); + EXIT_NOT_IMPLEMENTED(event->event.filter != EventQueue::KERNEL_EVFILT_VIDEO_OUT); + + if (event->filter.data != nullptr) + { + auto* video_out = static_cast(event->filter.data); + video_out->mutex.Lock(); + EXIT_IF(video_out->vblank_eqs.IsEmpty()); + auto index = video_out->vblank_eqs.Find(eq); + EXIT_NOT_IMPLEMENTED(!video_out->vblank_eqs.IndexValid(index)); + video_out->vblank_eqs[index] = nullptr; + video_out->mutex.Unlock(); + } +} + +static void vblank_event_trigger_func(LibKernel::EventQueue::KernelEqueueEvent* event, void* trigger_data) +{ + EXIT_IF(event == nullptr); + event->triggered = true; + event->event.fflags++; + event->event.data = reinterpret_cast(trigger_data); +} + KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle, void* udata) { PRINT_NAME(); @@ -539,7 +700,9 @@ KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle, auto* ctx = g_video_out_context->Get(handle); - EXIT_NOT_IMPLEMENTED(ctx->flip_eq != nullptr); + ctx->mutex.Lock(); + + EXIT_NOT_IMPLEMENTED(ctx->flip_eqs.Contains(eq)); if (eq == nullptr) { @@ -547,20 +710,60 @@ KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle, } EventQueue::KernelEqueueEvent event; - event.triggered = false; - event.event.ident = VIDEO_OUT_EVENT_FLIP; - event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT; - event.event.udata = udata; - event.event.fflags = 0; - event.event.data = 0; - event.filter.delete_func = flip_event_delete_func; - event.filter.reset_func = flip_event_reset_func; - event.filter.trigger_func = flip_event_trigger_func; - event.filter.data = ctx; + event.triggered = false; + event.event.ident = VIDEO_OUT_EVENT_FLIP; + event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT; + event.event.udata = udata; + event.event.fflags = 0; + event.event.data = 0; + event.filter.delete_event_func = flip_event_delete_func; + event.filter.reset_func = flip_event_reset_func; + event.filter.trigger_func = flip_event_trigger_func; + event.filter.data = ctx; int result = EventQueue::KernelAddEvent(eq, event); - ctx->flip_eq = eq; + ctx->flip_eqs.Add(eq); + + ctx->mutex.Unlock(); + + return result; +} + +KYTY_SYSV_ABI int VideoOutAddVblankEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata) +{ + PRINT_NAME(); + + EXIT_IF(g_video_out_context == nullptr); + + auto* ctx = g_video_out_context->Get(handle); + + ctx->mutex.Lock(); + + EXIT_NOT_IMPLEMENTED(ctx->vblank_eqs.Contains(eq)); + + if (eq == nullptr) + { + return VIDEO_OUT_ERROR_INVALID_EVENT_QUEUE; + } + + EventQueue::KernelEqueueEvent event; + event.triggered = false; + event.event.ident = VIDEO_OUT_EVENT_VBLANK; + event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT; + event.event.udata = udata; + event.event.fflags = 0; + event.event.data = 0; + event.filter.delete_event_func = vblank_event_delete_func; + event.filter.reset_func = vblank_event_reset_func; + event.filter.trigger_func = vblank_event_trigger_func; + event.filter.data = ctx; + + int result = EventQueue::KernelAddEvent(eq, event); + + ctx->vblank_eqs.Add(eq); + + ctx->mutex.Unlock(); return result; } @@ -615,16 +818,19 @@ KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* con EXIT_NOT_IMPLEMENTED(attribute->pitchInPixel != attribute->width); EXIT_NOT_IMPLEMENTED(attribute->option != 0); - auto buffer_size = calc_buffer_size(attribute); + uint64_t buffer_size = 0; + uint64_t buffer_pitch = 0; + calc_buffer_size(attribute, &buffer_size, &buffer_pitch); EXIT_NOT_IMPLEMENTED(buffer_size == 0); + EXIT_NOT_IMPLEMENTED(buffer_pitch == 0); ctx->buffers_sets[set_index].start_index = start_index; ctx->buffers_sets[set_index].num = buffer_num; ctx->buffers_sets[set_index].attr = *attribute; Graphics::VideoOutBufferObject vulkan_buffer_info(attribute->pixelFormat, attribute->width, attribute->height, - (attribute->tilingMode == 0), Config::IsNeo()); + (attribute->tilingMode == 0), Config::IsNeo(), buffer_pitch); for (int i = 0; i < buffer_num; i++) { @@ -636,6 +842,7 @@ KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* con ctx->buffers[i + start_index].set_id = set_index; ctx->buffers[i + start_index].buffer = addresses[i]; ctx->buffers[i + start_index].buffer_size = buffer_size; + ctx->buffers[i + start_index].buffer_pitch = buffer_pitch; ctx->buffers[i + start_index].buffer_vulkan = static_cast(Graphics::GpuMemoryGetObject( g_video_out_context->GetGraphicCtx(), reinterpret_cast(addresses[i]), buffer_size, vulkan_buffer_info)); @@ -717,6 +924,44 @@ KYTY_SYSV_ABI int VideoOutGetFlipStatus(int handle, VideoOutFlipStatus* status) return OK; } +KYTY_SYSV_ABI int VideoOutGetVblankStatus(int handle, VideoOutVblankStatus* status) +{ + PRINT_NAME(); + + EXIT_IF(g_video_out_context == nullptr); + + if (status == nullptr) + { + return VIDEO_OUT_ERROR_INVALID_ADDRESS; + } + + auto* ctx = g_video_out_context->Get(handle); + + ctx->mutex.Lock(); + *status = ctx->vblank_status; + ctx->mutex.Unlock(); + + printf("\t count = %" PRIu64 "\n", status->count); + printf("\t processTime = %" PRIu64 "\n", status->processTime); + printf("\t tsc = %" PRIu64 "\n", status->tsc); + + return OK; +} + +KYTY_SYSV_ABI int VideoOutSetWindowModeMargins(int handle, int top, int bottom) +{ + PRINT_NAME(); + + EXIT_IF(g_video_out_context == nullptr); + + [[maybe_unused]] auto* ctx = g_video_out_context->Get(handle); + + printf("\t top = %d\n", top); + printf("\t bottom = %d\n", bottom); + + return OK; +} + } // namespace Kyty::Libs::VideoOut #endif // KYTY_EMU_ENABLED diff --git a/source/emulator/src/Graphics/VideoOutBuffer.cpp b/source/emulator/src/Graphics/VideoOutBuffer.cpp index 48849d3..8665463 100644 --- a/source/emulator/src/Graphics/VideoOutBuffer.cpp +++ b/source/emulator/src/Graphics/VideoOutBuffer.cpp @@ -34,7 +34,7 @@ void* VideoOutBufferObject::Create(GraphicContext* ctx, const uint64_t* vaddr, c vk_obj->extent.width = width; vk_obj->extent.height = height; - vk_obj->format = VK_FORMAT_B8G8R8A8_SRGB; // VK_FORMAT_R8G8B8A8_SRGB; + vk_obj->format = VK_FORMAT_B8G8R8A8_SRGB; vk_obj->image = nullptr; vk_obj->image_view = nullptr; @@ -68,12 +68,17 @@ void* VideoOutBufferObject::Create(GraphicContext* ctx, const uint64_t* vaddr, c EXIT_NOT_IMPLEMENTED(!allocated); - // vkBindImageMemory(ctx->device, vk_obj->image, mem->memory, mem->offset); VulkanBindImageMemory(ctx, vk_obj, mem); vk_obj->memory = *mem; - EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size); + printf("VideoOutBufferObject::Create()\n"); + printf("\t mem->requirements.size = %" PRIu64 "\n", mem->requirements.size); + printf("\t width = %" PRIu64 "\n", width); + printf("\t height = %" PRIu64 "\n", height); + printf("\t size = %" PRIu64 "\n", *size); + + // EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size); GetUpdateFunc()(ctx, params, vk_obj, vaddr, size, vaddr_num); @@ -130,26 +135,30 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, auto* vk_obj = static_cast(obj); - bool tiled = (params[VideoOutBufferObject::PARAM_TILED] != 0); - bool neo = (params[VideoOutBufferObject::PARAM_NEO] != 0); + bool tiled = (params[VideoOutBufferObject::PARAM_TILED] != 0); + bool neo = (params[VideoOutBufferObject::PARAM_NEO] != 0); + auto pitch = params[VideoOutBufferObject::PARAM_PITCH]; + auto width = params[VideoOutBufferObject::PARAM_WIDTH]; + auto height = params[VideoOutBufferObject::PARAM_HEIGHT]; if (tiled && buffer_is_tiled(*vaddr, *size)) { + EXIT_NOT_IMPLEMENTED(width != pitch); auto* temp_buf = new uint8_t[*size]; - TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::VideoOutTiled, - params[VideoOutBufferObject::PARAM_WIDTH], params[VideoOutBufferObject::PARAM_HEIGHT], neo); - UtilFillImage(ctx, vk_obj, temp_buf, *size); + TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::VideoOutTiled, width, height, neo); + UtilFillImage(ctx, vk_obj, temp_buf, *size, pitch); delete[] temp_buf; } else { - UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size); + UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, pitch); } } bool VideoOutBufferObject::Equal(const uint64_t* other) const { return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_WIDTH] == other[PARAM_WIDTH] && - params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_TILED] == other[PARAM_TILED]); + params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_TILED] == other[PARAM_TILED] && + params[PARAM_PITCH] == other[PARAM_PITCH]); } static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem) diff --git a/source/emulator/src/Graphics/Window.cpp b/source/emulator/src/Graphics/Window.cpp index 656b5da..7f1e4dc 100644 --- a/source/emulator/src/Graphics/Window.cpp +++ b/source/emulator/src/Graphics/Window.cpp @@ -387,10 +387,12 @@ void game_show_window(GameApi* game, const Core::Timer& timer) printf("skip frame %d\n", p->skip_frames); } else { - if (VideoOut::FlipWindow(100000)) + VideoOut::VideoOutBeginVblank(); + if (VideoOut::VideoOutFlipWindow(100000)) { CalcFrameTime(game, timer.GetTimeS()); } + VideoOut::VideoOutEndVblank(); } } p->mutex.Unlock(); @@ -1403,6 +1405,39 @@ static VkPhysicalDevice VulkanFindPhysicalDevice(VkInstance instance, VkSurfaceK skip_device = true; } + if (!skip_device && + !CheckFormat(device, VK_FORMAT_R8G8B8A8_SRGB, true, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + { + printf("Format VK_FORMAT_R8G8B8A8_SRGB cannot be used as texture"); + skip_device = true; + } + + if (!skip_device && + !CheckFormat(device, VK_FORMAT_R8G8B8A8_SRGB, true, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + { + printf("Format VK_FORMAT_R8G8B8A8_SRGB cannot be used as texture"); + + if (!skip_device && !CheckFormat(device, VK_FORMAT_R8G8B8A8_UNORM, true, + VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + { + printf("Format VK_FORMAT_R8G8B8A8_UNORM cannot be used as texture"); + skip_device = true; + } + } + + if (!skip_device && + !CheckFormat(device, VK_FORMAT_B8G8R8A8_SRGB, true, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + { + printf("Format VK_FORMAT_B8G8R8A8_SRGB cannot be used as texture"); + + if (!skip_device && !CheckFormat(device, VK_FORMAT_B8G8R8A8_UNORM, true, + VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) + { + printf("Format VK_FORMAT_B8G8R8A8_UNORM cannot be used as texture"); + skip_device = true; + } + } + /*if (!skip_device && !CheckFormat(device, VK_FORMAT_S8_UINT, true, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { printf("Format VK_FORMAT_S8_UINT cannot be used as depth buffer"); @@ -1483,7 +1518,7 @@ static VkDevice VulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKH queue_create_info.queueCount = queue_count; queue_create_info.pQueuePriorities = queue_priority.GetDataConst(); - VkPhysicalDeviceFeatures device_features {}; + // VkPhysicalDeviceFeatures device_features {}; VkDeviceCreateInfo create_info {}; create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; @@ -1495,7 +1530,7 @@ static VkDevice VulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKH create_info.ppEnabledLayerNames = (r->enable_validation_layers ? r->required_layers.GetDataConst() : nullptr); create_info.enabledExtensionCount = device_extensions.Size(); create_info.ppEnabledExtensionNames = device_extensions.GetDataConst(); - create_info.pEnabledFeatures = &device_features; + create_info.pEnabledFeatures = nullptr; //&device_features; VkDevice device = nullptr; diff --git a/source/emulator/src/Kernel/EventQueue.cpp b/source/emulator/src/Kernel/EventQueue.cpp index cacee9f..09421e7 100644 --- a/source/emulator/src/Kernel/EventQueue.cpp +++ b/source/emulator/src/Kernel/EventQueue.cpp @@ -49,9 +49,9 @@ KernelEqueuePrivate::~KernelEqueuePrivate() { auto& event = m_events[index]; - if (event.filter.delete_func != nullptr) + if (event.filter.delete_event_func != nullptr) { - event.filter.delete_func(&event); + event.filter.delete_event_func(this, &event); } } } @@ -176,9 +176,9 @@ bool KernelEqueuePrivate::DeleteEvent(uintptr_t ident, int16_t filter) { auto& event = m_events[index]; - if (event.filter.delete_func != nullptr) + if (event.filter.delete_event_func != nullptr) { - event.filter.delete_func(&event); + event.filter.delete_event_func(this, &event); } m_events.Remove(index); @@ -267,7 +267,7 @@ int KYTY_SYSV_ABI KernelDeleteEqueue(KernelEqueue eq) int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, int* out, const KernelUseconds* timo) { - PRINT_NAME(); + // PRINT_NAME(); if (eq == nullptr) { @@ -286,7 +286,7 @@ int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, in EXIT_NOT_IMPLEMENTED(out == nullptr); - printf("\tEqueue wait: %s\n", eq->GetName().C_Str()); + // printf("\tEqueue wait: %s\n", eq->GetName().C_Str()); if (timo == nullptr) { @@ -306,11 +306,11 @@ int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, in if (*out == 0) { - printf("\ttimedout\n"); + // printf("\ttimedout\n"); return KERNEL_ERROR_ETIMEDOUT; } - printf("\treceived %u events\n", *out); + // printf("\treceived %u events\n", *out); return OK; } diff --git a/source/emulator/src/Libs/LibGraphicsDriver.cpp b/source/emulator/src/Libs/LibGraphicsDriver.cpp index 4d0c89a..c31c25f 100644 --- a/source/emulator/src/Libs/LibGraphicsDriver.cpp +++ b/source/emulator/src/Libs/LibGraphicsDriver.cpp @@ -21,6 +21,7 @@ LIB_DEFINE(InitGraphicsDriver_1) LIB_FUNC("zwY0YV91TTI", Graphics::GraphicsSubmitCommandBuffers); LIB_FUNC("xbxNatawohc", Graphics::GraphicsSubmitAndFlipCommandBuffers); LIB_FUNC("yvZ73uQUqrk", Graphics::GraphicsSubmitDone); + LIB_FUNC("b08AgtPlHPg", Graphics::GraphicsAreSubmitsAllowed); LIB_FUNC("iBt3Oe00Kvc", Graphics::GraphicsFlushMemory); LIB_FUNC("b0xyllnVY-I", Graphics::GraphicsAddEqEvent); LIB_FUNC("PVT+fuoS9gU", Graphics::GraphicsDeleteEqEvent); diff --git a/source/emulator/src/Libs/LibKernel.cpp b/source/emulator/src/Libs/LibKernel.cpp index a75dfe2..0dca969 100644 --- a/source/emulator/src/Libs/LibKernel.cpp +++ b/source/emulator/src/Libs/LibKernel.cpp @@ -246,11 +246,20 @@ static int KYTY_SYSV_ABI write(int d, const char* str, int64_t size) int size_int = static_cast(size); - printf(FG_BRIGHT_MAGENTA "%.*s" DEFAULT, size_int, str); + emu_printf(FG_BRIGHT_MAGENTA "%.*s" DEFAULT, size_int, str); return size_int; } +static int64_t KYTY_SYSV_ABI read(int d, void* buf, uint64_t nbytes) +{ + // PRINT_NAME(); + + EXIT_NOT_IMPLEMENTED(d != 0); + + return static_cast(strlen(std::fgets(static_cast(buf), static_cast(nbytes), stdin))); +} + static int KYTY_SYSV_ABI KernelGetModuleInfoFromAddr(uint64_t addr, int n, ModuleInfo* r) { PRINT_NAME(); @@ -457,6 +466,7 @@ LIB_DEFINE(InitLibKernel_1_Equeue) LIB_FUNC("D0OdFMjp46I", EventQueue::KernelCreateEqueue); LIB_FUNC("jpFjmgAC5AE", EventQueue::KernelDeleteEqueue); LIB_FUNC("fzyMKs9kim0", EventQueue::KernelWaitEqueue); + LIB_FUNC("vz+pg2zdopI", EventQueue::KernelGetEventUserData); } LIB_DEFINE(InitLibKernel_1_EventFlag) @@ -481,6 +491,7 @@ LIB_DEFINE(InitLibKernel_1_Pthread) LIB_FUNC("6UgtwV+0zb4", LibKernel::PthreadCreate); LIB_FUNC("3PtV6p3QNX4", LibKernel::PthreadEqual); LIB_FUNC("onNY9Byn-W8", LibKernel::PthreadJoin); + LIB_FUNC("4qGrR6eoP9Y", LibKernel::PthreadDetach); LIB_FUNC("How7B8Oet6k", LibKernel::PthreadGetname); LIB_FUNC("62KCwEMmzcM", LibKernel::PthreadAttrDestroy); @@ -509,6 +520,7 @@ LIB_DEFINE(InitLibKernel_1_Pthread) LIB_FUNC("QBi7HCK03hw", LibKernel::KernelClockGettime); LIB_FUNC("ejekcaNQNq0", LibKernel::KernelGettimeofday); LIB_FUNC("1j3S3n-tTW4", LibKernel::KernelGetTscFrequency); + LIB_FUNC("4J2sUJmuHZQ", LibKernel::KernelGetProcessTime); LIB_FUNC("7H0iTOciTLo", LibKernel::pthread_mutex_lock_s); LIB_FUNC("2Z+PpY6CaJg", LibKernel::pthread_mutex_unlock_s); @@ -535,6 +547,7 @@ LIB_DEFINE(InitLibKernel_1) LIB_FUNC("959qrazPIrg", LibKernel::KernelGetProcParam); LIB_FUNC("p5EcQeEeJAE", LibKernel::KernelRtldSetApplicationHeapAPI); LIB_FUNC("FxVZqBAA7ks", LibKernel::write); + LIB_FUNC("DRuBt2pvICk", LibKernel::read); LIB_FUNC("f7KBOafysXo", LibKernel::KernelGetModuleInfoFromAddr); LIB_FUNC("zE-wXIZjLoM", LibKernel::KernelDebugRaiseExceptionOnReleaseMode); LIB_FUNC("OMDRKKAZ8I4", LibKernel::KernelDebugRaiseException); diff --git a/source/emulator/src/Libs/LibVideoOut.cpp b/source/emulator/src/Libs/LibVideoOut.cpp index b01bdfc..d1a953d 100644 --- a/source/emulator/src/Libs/LibVideoOut.cpp +++ b/source/emulator/src/Libs/LibVideoOut.cpp @@ -19,9 +19,12 @@ LIB_DEFINE(InitVideoOut_1) LIB_FUNC("i6-sR91Wt-4", VideoOut::VideoOutSetBufferAttribute); LIB_FUNC("CBiu4mCE1DA", VideoOut::VideoOutSetFlipRate); LIB_FUNC("HXzjK9yI30k", VideoOut::VideoOutAddFlipEvent); + LIB_FUNC("Xru92wHJRmg", VideoOut::VideoOutAddVblankEvent); LIB_FUNC("w3BY+tAEiQY", VideoOut::VideoOutRegisterBuffers); LIB_FUNC("U46NwOiJpys", VideoOut::VideoOutSubmitFlip); LIB_FUNC("SbU3dwp80lQ", VideoOut::VideoOutGetFlipStatus); + LIB_FUNC("1FZBKy8HeNU", VideoOut::VideoOutGetVblankStatus); + LIB_FUNC("MTxxrOCeSig", VideoOut::VideoOutSetWindowModeMargins); } } // namespace Kyty::Libs diff --git a/source/emulator/src/Log.cpp b/source/emulator/src/Log.cpp index 2e6cba6..14a1440 100644 --- a/source/emulator/src/Log.cpp +++ b/source/emulator/src/Log.cpp @@ -167,6 +167,35 @@ void SetOutputFile(const String& file_name, Core::File::Encoding enc) } // namespace Log +void emu_printf(const char* format, ...) +{ + EXIT_IF(!Log::g_log_initialized); + + EXIT_IF(Log::g_mutex == nullptr); + + Log::g_mutex->Lock(); + { + va_list args {}; + va_start(args, format); + String s; + s.Printf(format, args); + va_end(args); + + if (!Log::g_colored_printf) + { + s = Log::RemoveColors(s); + } + + ::printf("%s", s.C_Str()); + + if (Log::g_dir == Log::Direction::File && Log::g_file != nullptr) + { + Log::g_file->Write(s); + } + } + Log::g_mutex->Unlock(); +} + void printf(const char* format, ...) { EXIT_IF(!Log::g_log_initialized);