Minor changes

This commit is contained in:
InoriRus 2021-12-29 18:09:27 +10:00
parent 037431589f
commit 435a26c591
31 changed files with 1817 additions and 645 deletions

View file

@ -79,7 +79,7 @@ if (KYTY_LINKER STREQUAL LD)
set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000")
endif()
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.4)
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.5)
include(src_script.cmake)

View file

@ -72,7 +72,7 @@ void* GpuMemoryGetObject(GraphicContext* ctx, uint64_t vaddr, uint64_t size, con
void* GpuMemoryGetObject(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, const GpuObject& info);
void GpuMemoryResetHash(GraphicContext* ctx, uint64_t vaddr, uint64_t size, GpuMemoryObjectType type);
void GpuMemoryDbgDump();
void GpuMemoryFlush();
void GpuMemoryFlush(GraphicContext* ctx);
void GpuMemoryFrameDone();
void GpuMemoryWriteBack(GraphicContext* ctx);

View file

@ -28,6 +28,7 @@ int KYTY_SYSV_ABI GraphicsSubmitAndFlipCommandBuffers(uint32_t count, void*
void* ccb_gpu_addrs[], const uint32_t* ccb_sizes_in_bytes, int handle, int index,
int flip_mode, int64_t flip_arg);
int KYTY_SYSV_ABI GraphicsSubmitDone();
int KYTY_SYSV_ABI GraphicsAreSubmitsAllowed();
void KYTY_SYSV_ABI GraphicsFlushMemory();
int KYTY_SYSV_ABI GraphicsAddEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id, void* udata);
int KYTY_SYSV_ABI GraphicsDeleteEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id);

View file

@ -12,6 +12,7 @@ namespace Kyty::Libs::Graphics {
class HardwareContext;
class UserConfig;
class CommandProcessor;
struct VideoOutVulkanImage;
struct DepthStencilVulkanImage;
struct TextureVulkanImage;
@ -27,6 +28,8 @@ public:
CommandBuffer() { Allocate(); }
virtual ~CommandBuffer() { Free(); }
void SetParent(CommandProcessor* parent) { m_parent = parent; }
KYTY_CLASS_NO_COPY(CommandBuffer);
[[nodiscard]] bool IsInvalid() const;
@ -48,11 +51,14 @@ public:
void SetQueue(int queue) { m_queue = queue; }
void CommandProcessorWait();
private:
VulkanCommandPool* m_pool = nullptr;
uint32_t m_index = static_cast<uint32_t>(-1);
int m_queue = -1;
bool m_execute = false;
CommandProcessor* m_parent = nullptr;
};
void GraphicsRenderInit();
@ -67,7 +73,10 @@ void GraphicsRenderWriteAtEndOfPipe(CommandBuffer* buffer, uint32_t* dst_gpu_add
void GraphicsRenderWriteAtEndOfPipeGds(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t dw_offset, uint32_t dw_num);
void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBackFlip(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value, int handle,
int index, int flip_mode, int64_t flip_arg);
void GraphicsRenderWriteAtEndOfPipeWithFlip(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value, int handle, int index,
int flip_mode, int64_t flip_arg);
void GraphicsRenderWriteAtEndOfPipeWithWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteAtEndOfPipeWithInterrupt(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteBack();
void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HardwareContext* ctx, uint32_t thread_group_x, uint32_t thread_group_y,

View file

@ -21,6 +21,7 @@ void GraphicsRunWait();
void GraphicsRunDone();
void GraphicsRunDingDong(uint32_t ring_id, uint32_t offset_dw);
int GraphicsRunGetFrameNum();
bool GraphicsRunAreSubmitsAllowed();
} // namespace Kyty::Libs::Graphics

View file

@ -40,11 +40,13 @@ enum class ShaderInstructionType
DsAppend,
DsConsume,
Exp,
ImageLoad,
ImageSample,
SAddcU32,
SAddI32,
SAddU32,
SAndB32,
SAndB64,
SAndn2B64,
SAndSaveexecB64,
SBfmB32,
@ -76,25 +78,28 @@ enum class ShaderInstructionType
SLshrB32,
SMovB32,
SMovB64,
SMovkI32,
SMulI32,
SNandB64,
SNorB64,
SOrB64,
SOrn2B64,
SSetpcB64,
SSwappcB64,
SWaitcnt,
SWqmB64,
SXnorB64,
SXorB64,
TBufferLoadFormatXyzw,
VAddI32,
VAndB32,
VOrB32,
VXorB32,
VAshrI32,
VAshrrevI32,
VBcntU32B32,
VBfeU32,
VBfrevB32,
VCvtF32I32,
VBfmB32,
VBfrevB32,
VCeilF32,
VCmpEqF32,
VCmpEqI32,
VCmpEqU32,
@ -128,10 +133,13 @@ enum class ShaderInstructionType
VCmpTU32,
VCmpUF32,
VCmpxEqU32,
VCmpxGeU32,
VCmpxGtU32,
VCmpxNeU32,
VCndmaskB32,
VCosF32,
VCvtF32F16,
VCvtF32I32,
VCvtF32U32,
VCvtF32Ubyte0,
VCvtF32Ubyte1,
@ -139,6 +147,9 @@ enum class ShaderInstructionType
VCvtF32Ubyte3,
VCvtPkrtzF16F32,
VCvtU32F32,
VExpF32,
VFloorF32,
VFractF32,
VInterpP1F32,
VInterpP2F32,
VLshlB32,
@ -161,21 +172,19 @@ enum class ShaderInstructionType
VMulLoU32,
VMulU32U24,
VNotB32,
VOrB32,
VRcpF32,
VRsqF32,
VCeilF32,
VFractF32,
VRndneF32,
VTruncF32,
VExpF32,
VCosF32,
VFloorF32,
VRsqF32,
VSadU32,
VSqrtF32,
VSubF32,
VSubI32,
VSubrevF32,
VSubrevI32,
VTruncF32,
VXorB32,
ZMax
};
namespace ShaderInstructionFormat {
@ -274,6 +283,7 @@ enum Format : uint64_t
Vdata4Vaddr2SvSoffsOffenIdxenFloat4 = FormatDefine({DA4, S0A2, S1A4, S2, Offen, Idxen, Float4}),
Vdata3Vaddr3StSsDmask7 = FormatDefine({DA3, S0A3, S1A8, S2A4, Dmask7}),
Vdata4Vaddr3StSsDmaskF = FormatDefine({DA4, S0A3, S1A8, S2A4, DmaskF}),
Vdata4Vaddr3StDmaskF = FormatDefine({DA4, S0A3, S1A8, DmaskF}),
VdstVsrc0Vsrc1Smask2 = FormatDefine({D, S0, S1, S2A2}),
VdstVsrc0Vsrc1Vsrc2 = FormatDefine({D, S0, S1, S2}),
VdstVsrcAttrChan = FormatDefine({D, S0, Attr}),
@ -382,11 +392,18 @@ public:
{ return m_instructions.Contains(type, [](auto inst, auto type) { return inst.type == type; }); });
}
[[nodiscard]] bool IsEmbedded() const { return m_embedded; }
void SetEmbedded(bool embedded) { this->m_embedded = embedded; }
[[nodiscard]] uint32_t GetEmbeddedId() const { return m_embedded_id; }
void SetEmbeddedId(uint32_t embedded_id) { m_embedded_id = embedded_id; }
private:
Vector<ShaderInstruction> m_instructions;
Vector<ShaderLabel> m_labels;
ShaderType m_type = ShaderType::Unknown;
Vector<ShaderDebugPrintf> m_debug_printfs;
uint32_t m_embedded_id = 0;
bool m_embedded = false;
};
struct ShaderId
@ -562,9 +579,8 @@ struct ShaderStorageResources
int slots[BUFFERS_MAX] = {0};
int start_register[BUFFERS_MAX] = {0};
bool extended[BUFFERS_MAX] = {};
// int extended_index[BUFFERS_MAX] = {0};
int buffers_num = 0;
int binding_index = 0;
int buffers_num = 0;
int binding_index = 0;
};
struct ShaderTextureResources
@ -574,9 +590,8 @@ struct ShaderTextureResources
ShaderTextureResource textures[RES_MAX];
int start_register[RES_MAX] = {0};
bool extended[RES_MAX] = {};
// int extended_index[RES_MAX] = {0};
int textures_num = 0;
int binding_index = 0;
int textures_num = 0;
int binding_index = 0;
};
struct ShaderSamplerResources
@ -586,9 +601,8 @@ struct ShaderSamplerResources
ShaderSamplerResource samplers[RES_MAX];
int start_register[RES_MAX] = {0};
bool extended[RES_MAX] = {};
// int extended_index[RES_MAX] = {0};
int samplers_num = 0;
int binding_index = 0;
int samplers_num = 0;
int binding_index = 0;
};
struct ShaderGdsResources
@ -599,21 +613,19 @@ struct ShaderGdsResources
int slots[POINTERS_MAX] = {0};
int start_register[POINTERS_MAX] = {0};
bool extended[POINTERS_MAX] = {};
// int extended_index[POINTERS_MAX] = {0};
int pointers_num = 0;
int binding_index = 0;
int pointers_num = 0;
int binding_index = 0;
};
struct ShaderExtendedResources
{
bool used = false;
int slot = 0;
// int dw_num = 0;
bool used = false;
int slot = 0;
int start_register = 0;
ShaderExtendedResource data;
};
struct ShaderResources
struct ShaderBindResources
{
uint32_t push_constant_offset = 0;
uint32_t push_constant_size = 0;
@ -625,6 +637,11 @@ struct ShaderResources
ShaderExtendedResources extended;
};
struct ShaderBindParameters
{
bool textures2D_without_sampler = false;
};
struct ShaderVertexInputInfo
{
static constexpr int RES_MAX = 16;
@ -636,43 +653,49 @@ struct ShaderVertexInputInfo
ShaderVertexInputBuffer buffers[RES_MAX];
int buffers_num = 0;
int export_count = 0;
ShaderResources bind;
ShaderBindResources bind;
};
struct ShaderComputeInputInfo
{
uint32_t threads_num[3] = {0, 0, 0};
bool group_id[3] = {false, false, false};
int thread_ids_num = 0;
int workgroup_register = 0;
ShaderResources bind;
uint32_t threads_num[3] = {0, 0, 0};
bool group_id[3] = {false, false, false};
int thread_ids_num = 0;
int workgroup_register = 0;
ShaderBindResources bind;
};
struct ShaderPixelInputInfo
{
uint32_t interpolator_settings[32] = {0};
uint32_t input_num = 0;
uint8_t target_output_mode[8] = {};
bool ps_pos_xy = false;
bool ps_pixel_kill_enable = false;
ShaderResources bind;
uint32_t interpolator_settings[32] = {0};
uint32_t input_num = 0;
uint8_t target_output_mode[8] = {};
bool ps_pos_xy = false;
bool ps_pixel_kill_enable = false;
ShaderBindResources bind;
};
void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info);
void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info);
void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info);
ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info);
ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info);
ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info);
Vector<uint32_t> ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info);
Vector<uint32_t> ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info);
Vector<uint32_t> ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info);
bool ShaderIsDisabled(uint64_t addr);
void ShaderDisable(uint64_t id);
void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd);
void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info);
void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info);
void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info);
ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info);
ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info);
ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info);
ShaderCode ShaderParseVS(const VertexShaderInfo* regs);
ShaderCode ShaderParsePS(const PixelShaderInfo* regs);
ShaderCode ShaderParseCS(const ComputeShaderInfo* regs);
ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
Vector<uint32_t> ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
Vector<uint32_t> ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
Vector<uint32_t> ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
bool ShaderIsDisabled(uint64_t addr);
void ShaderDisable(uint64_t id);
void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd);
} // namespace Kyty::Libs::Graphics

View file

@ -16,27 +16,31 @@ struct VulkanMemory;
class TextureObject: public GpuObject
{
public:
static constexpr int PARAM_DFMT = 0;
static constexpr int PARAM_NFMT = 1;
static constexpr int PARAM_WIDTH = 2;
static constexpr int PARAM_HEIGHT = 3;
static constexpr int PARAM_LEVELS = 4;
static constexpr int PARAM_TILE = 5;
static constexpr int PARAM_NEO = 6;
static constexpr int PARAM_SWIZZLE = 7;
static constexpr int TEXTURE_USAGE_SAMPLED = 0;
static constexpr int TEXTURE_USAGE_STORAGE = 1;
TextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool htile, bool neo, uint32_t swizzle)
static constexpr int PARAM_DFMT_NFMT = 0;
static constexpr int PARAM_PITCH = 1;
static constexpr int PARAM_WIDTH_HEIGHT = 2;
static constexpr int PARAM_USAGE = 3;
static constexpr int PARAM_LEVELS = 4;
static constexpr int PARAM_TILE = 5;
static constexpr int PARAM_NEO = 6;
static constexpr int PARAM_SWIZZLE = 7;
TextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool htile, bool neo,
uint32_t swizzle, uint32_t usage)
{
params[PARAM_DFMT] = dfmt;
params[PARAM_NFMT] = nfmt;
params[PARAM_WIDTH] = width;
params[PARAM_HEIGHT] = height;
params[PARAM_LEVELS] = levels;
params[PARAM_TILE] = htile ? 1 : 0;
params[PARAM_NEO] = neo ? 1 : 0;
params[PARAM_SWIZZLE] = swizzle;
check_hash = true;
type = Graphics::GpuMemoryObjectType::Texture;
params[PARAM_DFMT_NFMT] = (static_cast<uint64_t>(dfmt) << 32u) | nfmt;
params[PARAM_PITCH] = pitch;
params[PARAM_WIDTH_HEIGHT] = (static_cast<uint64_t>(width) << 32u) | height;
params[PARAM_USAGE] = usage;
params[PARAM_LEVELS] = levels;
params[PARAM_TILE] = htile ? 1 : 0;
params[PARAM_NEO] = neo ? 1 : 0;
params[PARAM_SWIZZLE] = swizzle;
check_hash = true;
type = Graphics::GpuMemoryObjectType::Texture;
}
void* Create(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, VulkanMemory* mem) const override;

View file

@ -24,8 +24,8 @@ void TileConvertTiledToLinear(void* dst, const void* src, TileMode mode, uint32_
void TileGetDepthSize(uint32_t width, uint32_t height, uint32_t z_format, uint32_t stencil_format, bool htile, bool neo,
uint32_t* stencil_size, uint32_t* htile_size, uint32_t* depth_size, uint32_t* pitch);
void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size);
void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool tile, bool neo,
void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size, uint32_t* pitch);
void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool tile, bool neo,
uint32_t* total_size, uint32_t* level_sizes, uint32_t* padded_width, uint32_t* padded_height);
} // namespace Kyty::Libs::Graphics

View file

@ -27,15 +27,16 @@ struct BufferImageCopy
uint32_t offset;
uint32_t width;
uint32_t height;
uint32_t pitch;
};
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOutVulkanImage* dst_image);
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, uint32_t src_pitch, VideoOutVulkanImage* dst_image);
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureVulkanImage* dst_image,
const Vector<BufferImageCopy>& regions);
const Vector<BufferImageCopy>& regions, uint64_t dst_layout);
void UtilBlitImage(CommandBuffer* buffer, VideoOutVulkanImage* src_image, VulkanSwapchain* dst_swapchain);
void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* dst_image, const void* src_data, uint64_t size);
void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* dst_image, const void* src_data, uint64_t size, uint32_t src_pitch);
void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* dst_image, const void* src_data, uint64_t size,
const Vector<BufferImageCopy>& regions);
const Vector<BufferImageCopy>& regions, uint64_t dst_layout);
void UtilCopyBuffer(VulkanBuffer* src_buffer, VulkanBuffer* dst_buffer, uint64_t size);
void UtilSetImageLayoutOptimal(DepthStencilVulkanImage* image);
void UtilSetImageLayoutOptimal(VideoOutVulkanImage* image);

View file

@ -19,12 +19,14 @@ namespace Kyty::Libs::VideoOut {
struct VideoOutResolutionStatus;
struct VideoOutBufferAttribute;
struct VideoOutFlipStatus;
struct VideoOutVblankStatus;
struct VideoOutBufferImageInfo
{
Graphics::VideoOutVulkanImage* image = nullptr;
uint32_t index = static_cast<uint32_t>(-1);
uint64_t buffer_size = 0;
Graphics::VideoOutVulkanImage* image = nullptr;
uint32_t index = static_cast<uint32_t>(-1);
uint64_t buffer_size = 0;
uint64_t buffer_pitch = 0;
};
void VideoOutInit(uint32_t width, uint32_t height);
@ -38,12 +40,17 @@ KYTY_SYSV_ABI void VideoOutSetBufferAttribute(VideoOutBufferAttribute* attribute
uint32_t aspect_ratio, uint32_t width, uint32_t height, uint32_t pitch_in_pixel);
KYTY_SYSV_ABI int VideoOutSetFlipRate(int handle, int rate);
KYTY_SYSV_ABI int VideoOutAddFlipEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata);
KYTY_SYSV_ABI int VideoOutAddVblankEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata);
KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* const* addresses, int buffer_num,
const VideoOutBufferAttribute* attribute);
KYTY_SYSV_ABI int VideoOutSubmitFlip(int handle, int index, int flip_mode, int64_t flip_arg);
KYTY_SYSV_ABI int VideoOutGetFlipStatus(int handle, VideoOutFlipStatus* status);
KYTY_SYSV_ABI int VideoOutGetVblankStatus(int handle, VideoOutVblankStatus* status);
KYTY_SYSV_ABI int VideoOutSetWindowModeMargins(int handle, int top, int bottom);
bool FlipWindow(uint32_t micros);
void VideoOutBeginVblank();
void VideoOutEndVblank();
bool VideoOutFlipWindow(uint32_t micros);
} // namespace Kyty::Libs::VideoOut

View file

@ -21,14 +21,16 @@ public:
static constexpr int PARAM_HEIGHT = 2;
static constexpr int PARAM_TILED = 3;
static constexpr int PARAM_NEO = 4;
static constexpr int PARAM_PITCH = 5;
explicit VideoOutBufferObject(uint32_t pixel_format, uint32_t width, uint32_t height, bool tiled, bool neo)
explicit VideoOutBufferObject(uint32_t pixel_format, uint32_t width, uint32_t height, bool tiled, bool neo, uint32_t pitch)
{
params[PARAM_FORMAT] = pixel_format;
params[PARAM_WIDTH] = width;
params[PARAM_HEIGHT] = height;
params[PARAM_TILED] = tiled ? 1 : 0;
params[PARAM_NEO] = neo ? 1 : 0;
params[PARAM_PITCH] = pitch;
check_hash = true;
type = Graphics::GpuMemoryObjectType::VideoOutBuffer;
}

View file

@ -22,9 +22,11 @@ constexpr int16_t KERNEL_EVFILT_HRTIMER = -15;
class KernelEqueuePrivate;
struct KernelEqueueEvent;
using KernelEqueue = KernelEqueuePrivate*;
using trigger_func_t = void (*)(KernelEqueueEvent* event, void* trigger_data);
using reset_func_t = void (*)(KernelEqueueEvent* event);
using delete_func_t = void (*)(KernelEqueueEvent* event);
using delete_func_t = void (*)(KernelEqueue eq, KernelEqueueEvent* event);
struct KernelEvent
{
@ -38,10 +40,10 @@ struct KernelEvent
struct KernelFilter
{
void* data = nullptr;
trigger_func_t trigger_func = nullptr;
reset_func_t reset_func = nullptr;
delete_func_t delete_func = nullptr;
void* data = nullptr;
trigger_func_t trigger_func = nullptr;
reset_func_t reset_func = nullptr;
delete_func_t delete_event_func = nullptr;
};
struct KernelEqueueEvent
@ -51,8 +53,6 @@ struct KernelEqueueEvent
KernelFilter filter;
};
using KernelEqueue = KernelEqueuePrivate*;
int KYTY_SYSV_ABI KernelAddEvent(KernelEqueue eq, const KernelEqueueEvent& event);
int KYTY_SYSV_ABI KernelTriggerEvent(KernelEqueue eq, uintptr_t ident, int16_t filter, void* trigger_data);
int KYTY_SYSV_ABI KernelDeleteEvent(KernelEqueue eq, uintptr_t ident, int16_t filter);

View file

@ -77,6 +77,7 @@ String RemoveColors(const String& str);
} // namespace Log
void printf(const char* format, ...) KYTY_FORMAT_PRINTF(1, 2);
void emu_printf(const char* format, ...) KYTY_FORMAT_PRINTF(1, 2);
} // namespace Kyty

View file

@ -34,7 +34,12 @@ public:
void* GetObject(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, const GpuObject& info);
void ResetHash(GraphicContext* ctx, uint64_t* vaddr, uint64_t* size, int vaddr_num, GpuMemoryObjectType type);
void FrameDone();
void WriteBack(GraphicContext* ctx);
// Sync: GPU -> CPU
void WriteBack(GraphicContext* ctx);
// Sync: CPU -> GPU
void Flush(GraphicContext* ctx);
void DbgDump();
@ -387,7 +392,17 @@ void* GpuMemory::GetObject(GraphicContext* ctx, const uint64_t* vaddr, const uin
for (int vi = 0; vi < vaddr_num; vi++)
{
EXIT_NOT_IMPLEMENTED(!h.free && vaddr_overlap(h.vaddr, h.size, h.overlaps_num, vaddr[vi], size[vi]));
if (!h.free && vaddr_overlap(h.vaddr, h.size, h.overlaps_num, vaddr[vi], size[vi]))
{
if (h.overlaps_num == 1 &&
(h.overlaps[0].type == GpuMemoryObjectType::Label || h.overlaps[0].type == GpuMemoryObjectType::StorageBuffer))
{
Free(ctx, h);
} else
{
KYTY_NOT_IMPLEMENTED;
}
}
}
}
@ -641,6 +656,25 @@ void GpuMemory::WriteBack(GraphicContext* ctx)
}
}
void GpuMemory::Flush(GraphicContext* ctx)
{
Core::LockGuard lock(m_mutex);
for (auto& h: m_objects)
{
if (!h.free)
{
for (int oi = 0; oi < h.overlaps_num; oi++)
{
auto& o = h.overlaps[oi];
EXIT_IF(o.update_func == nullptr);
o.update_func(ctx, o.params, o.obj, h.vaddr, h.size, h.vaddr_num);
}
}
}
}
void GpuMemory::DbgDump()
{
Core::LockGuard lock(m_mutex);
@ -756,11 +790,13 @@ void GpuMemoryDbgDump()
g_gpu_memory->DbgDump();
}
void GpuMemoryFlush()
void GpuMemoryFlush(GraphicContext* ctx)
{
EXIT_IF(g_gpu_memory == nullptr);
EXIT_IF(ctx == nullptr);
// TODO(): update vulkan objects after CPU-drawing
// update vulkan objects after CPU-drawing
g_gpu_memory->Flush(ctx);
}
void GpuMemoryFrameDone()
@ -775,6 +811,7 @@ void GpuMemoryWriteBack(GraphicContext* ctx)
EXIT_IF(g_gpu_memory == nullptr);
EXIT_IF(ctx == nullptr);
// update CPU memory after GPU-drawing
g_gpu_memory->WriteBack(ctx);
}

View file

@ -89,6 +89,7 @@ int KYTY_SYSV_ABI GraphicsSetPsShader350(uint32_t* cmd, uint64_t size, const uin
{
PRINT_NAME();
EXIT_NOT_IMPLEMENTED(ps_regs == nullptr);
EXIT_NOT_IMPLEMENTED(size < sizeof(PsStageRegisters) / 12 + 1);
printf("\t cmd_buffer = %016" PRIx64 "\n", reinterpret_cast<uint64_t>(cmd));
@ -323,19 +324,20 @@ int KYTY_SYSV_ABI GraphicsSubmitDone()
PRINT_NAME();
GraphicsRunDone();
// GpuMemoryFrameDone();
// GpuMemoryDbgDump();
return OK;
}
int KYTY_SYSV_ABI GraphicsAreSubmitsAllowed()
{
return GraphicsRunAreSubmitsAllowed() ? 1 : 0;
}
void KYTY_SYSV_ABI GraphicsFlushMemory()
{
PRINT_NAME();
GraphicsRunDone();
EXIT("1");
GpuMemoryFlush(WindowGetGraphicContext());
}
int KYTY_SYSV_ABI GraphicsAddEqEvent(LibKernel::EventQueue::KernelEqueue eq, int id, void* udata)

File diff suppressed because it is too large Load diff

View file

@ -60,9 +60,12 @@ public:
void DrawIndex(uint32_t index_count, const void* index_addr, uint32_t flags, uint32_t type);
void DrawIndexAuto(uint32_t index_count, uint32_t flags);
void WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action,
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value);
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value,
uint32_t interrupt_selector);
void WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action,
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value);
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value,
uint32_t interrupt_selector);
void Flip(void* dst_gpu_addr, uint32_t value);
void FlipWithInterrupt(uint32_t eop_event_type, uint32_t cache_action, void* dst_gpu_addr, uint32_t value);
void WriteBack();
void MemoryBarrier();
@ -140,6 +143,7 @@ public:
int flip_mode, int64_t flip_arg);
void Done();
void WaitForIdle();
bool IsIdle();
void SetCp(CommandProcessor* cp)
{
@ -197,6 +201,7 @@ public:
void DingDong(uint32_t offset_dw);
void Done();
void WaitForIdle();
bool IsIdle();
void SetCp(CommandProcessor* cp)
{
@ -257,6 +262,7 @@ public:
void Done();
void Wait();
int GetFrameNum();
bool AreSubmitsAllowed();
private:
void Init();
@ -493,6 +499,27 @@ void Gpu::Done()
m_done_num++;
}
bool Gpu::AreSubmitsAllowed()
{
Core::LockGuard lock(m_mutex);
if (m_gfx_ring->IsIdle())
{
for (auto& cr: m_compute_ring)
{
if (cr != nullptr)
{
if (!cr->IsIdle())
{
return false;
}
}
}
return true;
}
return false;
}
int Gpu::GetFrameNum()
{
Core::LockGuard lock(m_mutex);
@ -583,6 +610,7 @@ void CommandProcessor::BufferInit()
EXIT_IF(buf != nullptr);
buf = new CommandBuffer;
buf->SetParent(this);
buf->SetQueue(m_queue);
}
@ -777,6 +805,12 @@ void GraphicsRing::WaitForIdle()
}
}
bool GraphicsRing::IsIdle()
{
Core::LockGuard lock(m_mutex);
return m_idle;
}
GraphicsRing::CmdBatch GraphicsRing::GetCmdBatch()
{
Core::LockGuard lock(m_mutex);
@ -956,6 +990,12 @@ void ComputeRing::WaitForIdle()
}
}
bool ComputeRing::IsIdle()
{
Core::LockGuard lock(m_mutex);
return m_idle;
}
void ComputeRing::SetActive(bool flag)
{
Core::LockGuard lock(m_mutex);
@ -1078,7 +1118,8 @@ void CommandProcessor::WaitFlipDone(uint32_t video_out_handle, uint32_t display_
}
void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action,
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value)
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint32_t value,
uint32_t interrupt_selector)
{
Core::LockGuard lock(m_mutex);
@ -1091,12 +1132,13 @@ void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_
printf("\t cache_action = 0x%08" PRIx32 "\n", cache_action);
printf("\t event_index = 0x%08" PRIx32 "\n", event_index);
printf("\t event_write_source = 0x%08" PRIx32 "\n", event_write_source);
printf("\t interrupt_selector = 0x%08" PRIx32 "\n", interrupt_selector);
printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast<uint64_t>(dst_gpu_addr));
printf("\t value = 0x%08" PRIx32 "\n", value);
EXIT_NOT_IMPLEMENTED(cache_policy != 0x00000000);
EXIT_NOT_IMPLEMENTED(event_write_dest != 0x00000000);
// EXIT_NOT_IMPLEMENTED(event_write_source != 0x00000002);
EXIT_NOT_IMPLEMENTED(interrupt_selector != 0x0);
if (event_write_source == 0x00000002 && eop_event_type == 0x0000002f && cache_action == 0x00000000 && event_index == 0x00000006)
{
@ -1111,7 +1153,8 @@ void CommandProcessor::WriteAtEndOfPipe32(uint32_t cache_policy, uint32_t event_
}
void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_write_dest, uint32_t eop_event_type, uint32_t cache_action,
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value)
uint32_t event_index, uint32_t event_write_source, void* dst_gpu_addr, uint64_t value,
uint32_t interrupt_selector)
{
Core::LockGuard lock(m_mutex);
@ -1124,28 +1167,38 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_
printf("\t cache_action = 0x%08" PRIx32 "\n", cache_action);
printf("\t event_index = 0x%08" PRIx32 "\n", event_index);
printf("\t event_write_source = 0x%08" PRIx32 "\n", event_write_source);
printf("\t interrupt_selector = 0x%08" PRIx32 "\n", interrupt_selector);
printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast<uint64_t>(dst_gpu_addr));
printf("\t value = 0x%016" PRIx64 "\n", value);
EXIT_NOT_IMPLEMENTED(cache_policy != 0x00000000);
EXIT_NOT_IMPLEMENTED(event_write_dest != 0x00000000);
// EXIT_NOT_IMPLEMENTED(event_write_source != 0x00000002);
if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000002)
if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x02 &&
(interrupt_selector == 0x00 || interrupt_selector == 0x03))
{
GraphicsRenderWriteAtEndOfPipe(m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr), value);
} else if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000001)
} else if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x01 &&
(interrupt_selector == 0x00 || interrupt_selector == 0x03))
{
GraphicsRenderWriteAtEndOfPipe(m_buffer[m_current_buffer], static_cast<uint32_t*>(dst_gpu_addr), value);
} else if (((eop_event_type == 0x00000004 && event_index == 0x00000005) ||
(eop_event_type == 0x00000028 && event_index == 0x00000005) ||
(eop_event_type == 0x0000002f && event_index == 0x00000006)) &&
cache_action == 0x00000038 && event_write_source == 0x00000002)
} else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x05) ||
(eop_event_type == 0x2f && event_index == 0x06)) &&
cache_action == 0x38 && event_write_source == 0x02 && (interrupt_selector == 0x00 || interrupt_selector == 0x03))
{
GraphicsRenderWriteAtEndOfPipeWithWriteBack(m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr), value);
} else if (eop_event_type == 0x00000004 && cache_action == 0x00000000 && event_index == 0x00000005 && event_write_source == 0x00000004)
} else if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && event_write_source == 0x04 &&
(interrupt_selector == 0x00 || interrupt_selector == 0x03))
{
GraphicsRenderWriteAtEndOfPipeClockCounter(m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr));
} else if ((eop_event_type == 0x04 && event_index == 0x05) && cache_action == 0x00 && event_write_source == 0x02 &&
interrupt_selector == 0x02)
{
GraphicsRenderWriteAtEndOfPipeWithInterrupt(m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr), value);
} else if ((eop_event_type == 0x04 && event_index == 0x05) && cache_action == 0x3b && event_write_source == 0x02 &&
interrupt_selector == 0x02)
{
GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr), value);
} else
{
EXIT("unknown event type\n");
@ -1178,6 +1231,20 @@ void CommandProcessor::TriggerEvent(uint32_t event_type, uint32_t event_index)
}
}
void CommandProcessor::Flip(void* dst_gpu_addr, uint32_t value)
{
Core::LockGuard lock(m_mutex);
EXIT_IF(m_current_buffer < 0 || m_current_buffer >= VK_BUFFERS_NUM);
printf("CommandProcessor::Flip()\n");
printf("\t dst_gpu_addr = 0x%016" PRIx64 "\n", reinterpret_cast<uint64_t>(dst_gpu_addr));
printf("\t value = 0x%08" PRIx32 "\n", value);
GraphicsRenderWriteAtEndOfPipeWithFlip(m_buffer[m_current_buffer], static_cast<uint32_t*>(dst_gpu_addr), value, m_flip.handle,
m_flip.index, m_flip.flip_mode, m_flip.flip_arg);
}
void CommandProcessor::FlipWithInterrupt(uint32_t eop_event_type, uint32_t cache_action, void* dst_gpu_addr, uint32_t value)
{
Core::LockGuard lock(m_mutex);
@ -1207,6 +1274,13 @@ void CommandProcessor::WriteBack()
GraphicsRenderWriteBack();
}
void CommandBuffer::CommandProcessorWait()
{
EXIT_IF(m_parent == nullptr);
m_parent->BufferWait();
}
void GraphicsRunSubmit(uint32_t* cmd_draw_buffer, uint32_t num_draw_dw, uint32_t* cmd_const_buffer, uint32_t num_const_dw)
{
EXIT_IF(cmd_draw_buffer == nullptr);
@ -1262,6 +1336,13 @@ void GraphicsRunDone()
g_gpu->Done();
}
bool GraphicsRunAreSubmitsAllowed()
{
EXIT_IF(g_gpu == nullptr);
return g_gpu->AreSubmitsAllowed();
}
int GraphicsRunGetFrameNum()
{
EXIT_IF(g_gpu == nullptr);
@ -1456,11 +1537,12 @@ KYTY_CP_OP_PARSER(cp_op_event_write_eop)
uint32_t cache_action = (buffer[0] >> 12u) & 0x3fu;
uint32_t event_index = (buffer[0] >> 8u) & 0x7u;
uint32_t event_write_source = ((buffer[2] >> 29u) & 0x7u);
uint32_t interrupt_selector = (buffer[2] >> 24u) & 0x7u;
auto* dst_gpu_addr = reinterpret_cast<void*>(buffer[1] | (static_cast<uint64_t>(buffer[2] & 0xffffu) << 32u));
uint64_t value = (buffer[3] | (static_cast<uint64_t>(buffer[4]) << 32u));
cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr,
value);
value, interrupt_selector);
return 5;
}
@ -1477,12 +1559,13 @@ KYTY_CP_OP_PARSER(cp_op_event_write_eos)
uint32_t cache_action = (buffer[0] >> 12u) & 0x3fu;
uint32_t event_index = (buffer[0] >> 8u) & 0x7u;
uint32_t event_write_source = ((buffer[2] >> 29u) & 0x7u);
uint32_t interrupt_selector = (buffer[2] >> 24u) & 0x7u;
auto* dst_gpu_addr = reinterpret_cast<void*>(buffer[1] | (static_cast<uint64_t>(buffer[2] & 0xffffu) << 32u));
uint32_t value = buffer[3];
cp->WriteAtEndOfPipe32(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr,
value);
value, interrupt_selector);
return 4;
}
@ -1503,10 +1586,8 @@ KYTY_CP_OP_PARSER(cp_op_release_mem)
auto* dst_gpu_addr = reinterpret_cast<void*>(buffer[2] | (static_cast<uint64_t>(buffer[3]) << 32u));
uint64_t value = (buffer[4] | (static_cast<uint64_t>(buffer[5]) << 32u));
EXIT_NOT_IMPLEMENTED(interrupt_selector != 0x3);
cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr,
value);
value, interrupt_selector);
return 6;
}
@ -1764,6 +1845,13 @@ KYTY_CP_OP_PARSER(cp_op_marker)
case 0x0: cp->SetEmbeddedDataMarker(buffer + 1, len_dw, align); break;
case 0x4: cp->SetUserDataMarker(UserSgprType::Vsharp); break;
case 0xd: cp->SetUserDataMarker(UserSgprType::Region); break;
case 0x778:
{
auto* addr = reinterpret_cast<void*>(buffer[1] | (static_cast<uint64_t>(buffer[2]) << 32u));
uint32_t value = buffer[3];
cp->Flip(addr, value);
break;
}
case 0x781:
{
auto* addr = reinterpret_cast<void*>(buffer[1] | (static_cast<uint64_t>(buffer[2]) << 32u));

View file

@ -14,11 +14,19 @@
namespace Kyty::Libs::Graphics {
enum LabelStatus
{
New,
Active,
ActiveDeleted,
NotActive,
};
struct Label
{
VkDevice device = nullptr;
VkEvent event = nullptr;
bool active = false;
LabelStatus status = LabelStatus::New;
uint64_t* dst_gpu_addr64 = nullptr;
uint64_t value64 = 0;
uint32_t* dst_gpu_addr32 = nullptr;
@ -26,6 +34,7 @@ struct Label
LabelGpuObject::callback_t callback_1 = nullptr;
LabelGpuObject::callback_t callback_2 = nullptr;
uint64_t args[4] = {};
CommandBuffer* buffer = nullptr;
};
class LabelManager
@ -67,15 +76,22 @@ void LabelManager::ThreadRun(void* data)
int active_count = 0;
Vector<Label*> deleted_labels;
for (auto& label: manager->m_labels)
{
if (label->active)
if (label->status == LabelStatus::Active || label->status == LabelStatus::ActiveDeleted)
{
active_count++;
if (vkGetEventStatus(label->device, label->event) == VK_EVENT_SET)
{
label->active = false;
if (label->status == LabelStatus::ActiveDeleted)
{
deleted_labels.Add(label);
}
label->status = LabelStatus::NotActive;
bool write = true;
@ -108,6 +124,11 @@ void LabelManager::ThreadRun(void* data)
}
}
for (auto& label: deleted_labels)
{
manager->Delete(label);
}
if (active_count == 0)
{
manager->m_cond_var.Wait(&manager->m_mutex);
@ -129,7 +150,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint64_t* dst_gpu_addr, uint64_
auto* label = new Label;
label->active = false;
label->status = LabelStatus::New;
label->dst_gpu_addr64 = dst_gpu_addr;
label->value64 = value;
label->dst_gpu_addr32 = nullptr;
@ -142,6 +163,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint64_t* dst_gpu_addr, uint64_
label->args[1] = args[1];
label->args[2] = args[2];
label->args[3] = args[3];
label->buffer = nullptr;
VkEventCreateInfo create_info {};
create_info.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
@ -168,7 +190,7 @@ Label* LabelManager::Create(GraphicContext* ctx, uint32_t* dst_gpu_addr, uint32_
auto* label = new Label;
label->active = false;
label->status = LabelStatus::New;
label->dst_gpu_addr32 = dst_gpu_addr;
label->value32 = value;
label->dst_gpu_addr64 = nullptr;
@ -208,13 +230,24 @@ void LabelManager::Delete(Label* label)
EXIT_NOT_IMPLEMENTED(!m_labels.IndexValid(index));
m_labels.RemoveAt(index);
EXIT_NOT_IMPLEMENTED(label->status != LabelStatus::NotActive && label->status != LabelStatus::Active);
EXIT_NOT_IMPLEMENTED(label->active);
if (label->status == LabelStatus::Active)
{
label->status = LabelStatus::ActiveDeleted;
} else
{
m_labels.RemoveAt(index);
vkDestroyEvent(label->device, label->event, nullptr);
EXIT_IF(label->buffer == nullptr);
delete label;
// All submitted commands that refer to event must have completed execution
label->buffer->CommandProcessorWait();
vkDestroyEvent(label->device, label->event, nullptr);
delete label;
}
}
void LabelManager::Set(CommandBuffer* buffer, Label* label)
@ -231,12 +264,14 @@ void LabelManager::Set(CommandBuffer* buffer, Label* label)
EXIT_NOT_IMPLEMENTED(!m_labels.IndexValid(index));
EXIT_NOT_IMPLEMENTED(label->active);
EXIT_NOT_IMPLEMENTED(label->status != LabelStatus::New && label->status != LabelStatus::NotActive);
label->active = true;
label->status = LabelStatus::Active;
EXIT_IF(label->event == nullptr);
label->buffer = buffer;
auto* vk_buffer = buffer->GetPool()->buffers[buffer->GetIndex()];
EXIT_NOT_IMPLEMENTED(vk_buffer == nullptr);

View file

@ -35,6 +35,8 @@
[[maybe_unused]] uint32_t dw, [[maybe_unused]] uint32_t num_dw
#define KYTY_CP_OP_PARSER(f) static uint32_t f(KYTY_CP_OP_PARSER_ARGS)
KYTY_ENUM_RANGE(Kyty::Libs::Graphics::ShaderInstructionType, 0, static_cast<int>(Kyty::Libs::Graphics::ShaderInstructionType::ZMax));
namespace Kyty::Libs::Graphics {
struct ShaderBinaryInfo
@ -212,6 +214,7 @@ static String dbg_fmt_to_str(const ShaderInstruction& inst)
case ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4: return U"Vdata4Vaddr2SvSoffsOffenIdxenFloat4"; break;
case ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7: return U"Vdata4Vaddr3StSsDmask7"; break;
case ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF: return U"Vdata4Vaddr3StSsDmaskF"; break;
case ShaderInstructionFormat::Vdata4Vaddr3StDmaskF: return U"Vdata4Vaddr3StDmaskF"; break;
case ShaderInstructionFormat::SVdstSVsrc0SVsrc1: return U"SVdstSVsrc0SVsrc1"; break;
case ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2: return U"VdstVsrc0Vsrc1Smask2"; break;
case ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2: return U"VdstVsrc0Vsrc1Vsrc2"; break;
@ -474,7 +477,32 @@ KYTY_SHADER_PARSER(shader_parse_sopc)
KYTY_SHADER_PARSER(shader_parse_sopk)
{
KYTY_NOT_IMPLEMENTED;
EXIT_IF(dst == nullptr);
EXIT_IF(src == nullptr);
EXIT_IF(buffer == nullptr || buffer < src);
uint32_t opcode = (buffer[0] >> 23u) & 0x1fu;
auto imm = static_cast<int16_t>(buffer[0] >> 0u & 0xffffu);
uint32_t sdst = (buffer[0] >> 16u) & 0x7fu;
ShaderInstruction inst;
inst.pc = pc;
inst.dst = operand_parse(sdst);
switch (opcode) // NOLINT
{
case 0x00:
inst.type = ShaderInstructionType::SMovkI32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0;
inst.src[0].type = ShaderOperandType::IntegerInlineConstant;
inst.src[0].constant.i = imm;
inst.src_num = 1;
break;
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sopk opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
dst->GetInstructions().Add(inst);
return 1;
}
@ -659,6 +687,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
inst.src[1].size = 2;
break;
case 0x0e: inst.type = ShaderInstructionType::SAndB32; break;
case 0x0f:
inst.type = ShaderInstructionType::SAndB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x11:
inst.type = ShaderInstructionType::SOrB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
@ -666,6 +701,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x13:
inst.type = ShaderInstructionType::SXorB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x15:
inst.type = ShaderInstructionType::SAndn2B64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
@ -673,6 +715,20 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x17:
inst.type = ShaderInstructionType::SOrn2B64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x19:
inst.type = ShaderInstructionType::SNandB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x1b:
inst.type = ShaderInstructionType::SNorB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
@ -680,6 +736,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x1d:
inst.type = ShaderInstructionType::SXnorB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x1e: inst.type = ShaderInstructionType::SLshlB32; break;
case 0x20: inst.type = ShaderInstructionType::SLshrB32; break;
case 0x24: inst.type = ShaderInstructionType::SBfmB32; break;
@ -757,6 +820,7 @@ KYTY_SHADER_PARSER(shader_parse_vopc)
case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break;
case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break;
case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break;
case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break;
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown vopc opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
@ -1047,7 +1111,10 @@ KYTY_SHADER_PARSER(shader_parse_vop3)
case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break;
case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break;
case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break;
case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break;
case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break;
case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break;
case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break;
case 0x100:
inst.type = ShaderInstructionType::VCndmaskB32;
inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2;
@ -1441,7 +1508,7 @@ KYTY_SHADER_PARSER(shader_parse_mimg)
EXIT_IF(buffer == nullptr || buffer < src);
uint32_t slc = (buffer[0] >> 25u) & 0x1u;
uint32_t opcode = (buffer[0] >> 18u) & 0x1fu;
uint32_t opcode = (buffer[0] >> 18u) & 0x7fu;
uint32_t lwe = (buffer[0] >> 17u) & 0x1u;
uint32_t tff = (buffer[0] >> 16u) & 0x1u;
uint32_t r128 = (buffer[0] >> 15u) & 0x1u;
@ -1474,23 +1541,33 @@ KYTY_SHADER_PARSER(shader_parse_mimg)
inst.src[1] = operand_parse(srsrc * 4);
inst.src[2] = operand_parse(ssamp * 4);
if (dmask == 0x7)
{
inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7;
inst.dst.size = 3;
} else if (dmask == 0xf)
{
inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF;
inst.dst.size = 4;
}
switch (opcode) // NOLINT
switch (opcode)
{
case 0x00:
inst.type = ShaderInstructionType::ImageLoad;
inst.src[0].size = 3;
inst.src[1].size = 8;
inst.src_num = 2;
if (dmask == 0xf)
{
inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF;
inst.dst.size = 4;
}
break;
case 0x20:
inst.type = ShaderInstructionType::ImageSample;
inst.src[0].size = 3;
inst.src[1].size = 8;
inst.src[2].size = 4;
if (dmask == 0x7)
{
inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7;
inst.dst.size = 3;
} else if (dmask == 0xf)
{
inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF;
inst.dst.size = 4;
}
break;
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown mimg opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
@ -2222,7 +2299,7 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s
info->pointers_num++;
}
static void ShaderCalcBindingIndices(ShaderResources* bind)
static void ShaderCalcBindingIndices(ShaderBindResources* bind)
{
int binding_index = 0;
@ -2473,7 +2550,7 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo*
ShaderCalcBindingIndices(&info->bind);
}
static void ShaderDbgDumpResources(const ShaderResources& bind)
static void ShaderDbgDumpResources(const ShaderBindResources& bind)
{
printf("\t descriptor_set_slot = %u\n", bind.descriptor_set_slot);
printf("\t push_constant_offset = %u\n", bind.push_constant_offset);
@ -2835,45 +2912,34 @@ private:
String m_file_name;
};
Vector<uint32_t> ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info)
ShaderCode ShaderParseVS(const VertexShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Amber300);
String source;
Vector<uint32_t> ret;
ShaderLogHelper log("vs");
ShaderCode code;
code.SetType(ShaderType::Vertex);
if (regs->vs_embedded)
{
source = SpirvGetEmbeddedVs(regs->vs_embedded_id);
code.SetEmbedded(true);
code.SetEmbeddedId(regs->vs_embedded_id);
} else
{
const auto* src = reinterpret_cast<const uint32_t*>(regs->vs_regs.GetGpuAddress());
EXIT_NOT_IMPLEMENTED(src == nullptr);
vs_print("ShaderRecompileVS()", regs->vs_regs);
vs_print("ShaderParseVS()", regs->vs_regs);
vs_check(regs->vs_regs);
for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++)
{
const auto& r = input_info->bind.storage_buffers.buffers[i];
EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0);
}
const auto* header = GetBinaryInfo(src);
EXIT_NOT_IMPLEMENTED(header == nullptr);
bi_print("ShaderRecompileVS():ShaderBinaryInfo", *header);
ShaderCode code;
code.SetType(ShaderType::Vertex);
bi_print("ShaderParseVS():ShaderBinaryInfo", *header);
shader_parse(0, src, nullptr, &code);
log.DumpOriginalShader(code);
if (g_debug_printfs != nullptr)
{
auto id = (static_cast<uint64_t>(header->hash0) << 32u) | header->crc32;
@ -2882,6 +2948,31 @@ Vector<uint32_t> ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVer
code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds;
}
}
}
return code;
}
Vector<uint32_t> ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Amber300);
String source;
Vector<uint32_t> ret;
ShaderLogHelper log("vs");
if (code.IsEmbedded())
{
source = SpirvGetEmbeddedVs(code.GetEmbeddedId());
} else
{
for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++)
{
const auto& r = input_info->bind.storage_buffers.buffers[i];
EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0);
}
log.DumpOriginalShader(code);
source = SpirvGenerateSource(code, input_info, nullptr, nullptr);
}
@ -2898,21 +2989,48 @@ Vector<uint32_t> ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVer
return ret;
}
Vector<uint32_t> ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info)
ShaderCode ShaderParsePS(const PixelShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Blue300);
ShaderLogHelper log("ps");
const auto* src = reinterpret_cast<const uint32_t*>(regs->ps_regs.data_addr);
EXIT_NOT_IMPLEMENTED(src == nullptr);
ps_print("ShaderRecompilePS()", regs->ps_regs);
ps_print("ShaderParsePS()", regs->ps_regs);
ps_check(regs->ps_regs);
EXIT_NOT_IMPLEMENTED(regs->ps_regs.user_sgpr != regs->ps_user_sgpr.count);
const auto* header = GetBinaryInfo(src);
EXIT_NOT_IMPLEMENTED(header == nullptr);
bi_print("ShaderParsePS():ShaderBinaryInfo", *header);
ShaderCode code;
code.SetType(ShaderType::Pixel);
shader_parse(0, src, nullptr, &code);
if (g_debug_printfs != nullptr)
{
auto id = (static_cast<uint64_t>(header->hash0) << 32u) | header->crc32;
if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index))
{
code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds;
}
}
return code;
}
Vector<uint32_t> ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Blue300);
ShaderLogHelper log("ps");
for (uint32_t i = 0; i < input_info->input_num; i++)
{
EXIT_NOT_IMPLEMENTED(input_info->interpolator_settings[i] != i);
@ -2924,30 +3042,10 @@ Vector<uint32_t> ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixe
EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0);
}
const auto* header = GetBinaryInfo(src);
EXIT_NOT_IMPLEMENTED(header == nullptr);
bi_print("ShaderRecompilePS():ShaderBinaryInfo", *header);
ShaderCode code;
code.SetType(ShaderType::Pixel);
shader_parse(0, src, nullptr, &code);
Vector<uint32_t> ret;
log.DumpOriginalShader(code);
if (g_debug_printfs != nullptr)
{
auto id = (static_cast<uint64_t>(header->hash0) << 32u) | header->crc32;
if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index))
{
code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds;
}
}
auto source = SpirvGenerateSource(code, nullptr, input_info, nullptr);
log.DumpRecompiledShader(source);
@ -2962,42 +3060,30 @@ Vector<uint32_t> ShaderRecompilePS(const PixelShaderInfo* regs, const ShaderPixe
return ret;
}
Vector<uint32_t> ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info)
ShaderCode ShaderParseCS(const ComputeShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::CyanA700);
ShaderLogHelper log("cs");
const auto* src = reinterpret_cast<const uint32_t*>(regs->cs_regs.data_addr);
EXIT_NOT_IMPLEMENTED(src == nullptr);
cs_print("ShaderRecompileCS()", regs->cs_regs);
cs_print("ShaderParseCS()", regs->cs_regs);
cs_check(regs->cs_regs);
EXIT_NOT_IMPLEMENTED(regs->cs_regs.user_sgpr > regs->cs_user_sgpr.count);
for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++)
{
const auto& r = input_info->bind.storage_buffers.buffers[i];
EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0);
}
const auto* header = GetBinaryInfo(src);
EXIT_NOT_IMPLEMENTED(header == nullptr);
bi_print("ShaderRecompileCS():ShaderBinaryInfo", *header);
bi_print("ShaderParseCS():ShaderBinaryInfo", *header);
ShaderCode code;
code.SetType(ShaderType::Compute);
shader_parse(0, src, nullptr, &code);
Vector<uint32_t> ret;
log.DumpOriginalShader(code);
if (g_debug_printfs != nullptr)
{
auto id = (static_cast<uint64_t>(header->hash0) << 32u) | header->crc32;
@ -3007,6 +3093,25 @@ Vector<uint32_t> ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderCo
}
}
return code;
}
Vector<uint32_t> ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info)
{
KYTY_PROFILER_FUNCTION(profiler::colors::CyanA700);
ShaderLogHelper log("cs");
for (int i = 0; i < input_info->bind.storage_buffers.buffers_num; i++)
{
const auto& r = input_info->bind.storage_buffers.buffers[i];
EXIT_NOT_IMPLEMENTED(((r.Stride() * r.NumRecords()) & 0x3u) != 0);
}
Vector<uint32_t> ret;
log.DumpOriginalShader(code);
auto source = SpirvGenerateSource(code, nullptr, nullptr, input_info);
log.DumpRecompiledShader(source);
@ -3023,7 +3128,37 @@ Vector<uint32_t> ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderCo
return ret;
}
static void ShaderGetBindIds(ShaderId* ret, const ShaderResources& bind)
static ShaderBindParameters ShaderUpdateBindInfo(const ShaderCode& code, const ShaderBindResources* bind)
{
ShaderBindParameters p {};
if (bind->textures2D.textures_num > 0)
{
bool image_sample = code.HasAnyOf({ShaderInstructionType::ImageSample});
bool image_load = code.HasAnyOf({ShaderInstructionType::ImageLoad});
EXIT_NOT_IMPLEMENTED(image_sample && image_load);
p.textures2D_without_sampler = image_load;
}
return p;
}
ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind)
{
ret->ids.Add(bind.storage_buffers.buffers_num);

View file

@ -1140,7 +1140,7 @@ public:
void SetPsInputInfo(const ShaderPixelInputInfo* input_info) { m_ps_input_info = input_info; }
[[nodiscard]] const ShaderPixelInputInfo* GetPsInputInfo() const { return m_ps_input_info; }
[[nodiscard]] const ShaderResources* GetBindInfo() const { return m_bind; }
[[nodiscard]] const ShaderBindResources* GetBindInfo() const { return m_bind; }
void AddConstantUint(uint32_t u);
void AddConstantInt(int i);
@ -1199,7 +1199,8 @@ private:
const ShaderVertexInputInfo* m_vs_input_info = nullptr;
const ShaderComputeInputInfo* m_cs_input_info = nullptr;
const ShaderPixelInputInfo* m_ps_input_info = nullptr;
const ShaderResources* m_bind = nullptr;
const ShaderBindResources* m_bind = nullptr;
ShaderBindParameters m_bind_params;
Core::Array2<int, 64, 2> m_extended_mapping {};
};
@ -2126,12 +2127,12 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7)
static const char32_t* text = UR"(
%t24_<index> = OpLoad %uint %<src1_value0>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_SampledImage %textures2D %t24_<index>
%t27_<index> = OpLoad %SampledImage %t26_<index>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_<index>
%t27_<index> = OpLoad %Image %t26_<index>
%t33_<index> = OpLoad %uint %<src2_value0>
%t35_<index> = OpAccessChain %_ptr_UniformConstant_Sampler %samplers %t33_<index>
%t36_<index> = OpLoad %Sampler %t35_<index>
%t38_<index> = OpSampledImage %_SampledImage %t27_<index> %t36_<index>
%t38_<index> = OpSampledImage %SampledImage %t27_<index> %t36_<index>
%t39_<index> = OpLoad %float %<src0_value0>
%t40_<index> = OpLoad %float %<src0_value1>
%t42_<index> = OpCompositeConstruct %v2float %t39_<index> %t40_<index>
@ -2191,12 +2192,12 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF)
static const char32_t* text = UR"(
%t24_<index> = OpLoad %uint %<src1_value0>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_SampledImage %textures2D %t24_<index>
%t27_<index> = OpLoad %SampledImage %t26_<index>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_<index>
%t27_<index> = OpLoad %Image %t26_<index>
%t33_<index> = OpLoad %uint %<src2_value0>
%t35_<index> = OpAccessChain %_ptr_UniformConstant_Sampler %samplers %t33_<index>
%t36_<index> = OpLoad %Sampler %t35_<index>
%t38_<index> = OpSampledImage %_SampledImage %t27_<index> %t36_<index>
%t38_<index> = OpSampledImage %SampledImage %t27_<index> %t36_<index>
%t39_<index> = OpLoad %float %<src0_value0>
%t40_<index> = OpLoad %float %<src0_value1>
%t42_<index> = OpCompositeConstruct %v2float %t39_<index> %t40_<index>
@ -2233,6 +2234,72 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF)
return false;
}
KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
if (bind_info != nullptr && bind_info->textures2D.textures_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
auto dst_value2 = operand_variable_to_str(inst.dst, 2);
auto dst_value3 = operand_variable_to_str(inst.dst, 3);
auto src0_value0 = operand_variable_to_str(inst.src[0], 0);
auto src0_value1 = operand_variable_to_str(inst.src[0], 1);
auto src0_value2 = operand_variable_to_str(inst.src[0], 2);
auto src1_value0 = operand_variable_to_str(inst.src[1], 0);
EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Float);
EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Float);
EXIT_NOT_IMPLEMENTED(src1_value0.type != SpirvType::Uint);
// TODO() check VSKIP
// TODO() check LOD_CLAMPED
// TODO() swizzle channels
// TODO() convert SRGB -> LINEAR if SRGB format was replaced with UNORM
static const char32_t* text = UR"(
%t24_<index> = OpLoad %uint %<src1_value0>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_Image %textures2D %t24_<index>
%t27_<index> = OpLoad %Image %t26_<index>
%t67_<index> = OpLoad %float %<src0_value0>
%t69_<index> = OpBitcast %uint %t67_<index>
%t70_<index> = OpLoad %float %<src0_value1>
%t71_<index> = OpBitcast %uint %t70_<index>
%t73_<index> = OpCompositeConstruct %v2uint %t69_<index> %t71_<index>
%t74_<index> = OpImageRead %v4float %t27_<index> %t73_<index>
OpStore %temp_v4float %t74_<index>
%t46_<index> = OpAccessChain %_ptr_Function_float %temp_v4float %uint_0
%t47_<index> = OpLoad %float %t46_<index>
OpStore %<dst_value0> %t47_<index>
%t50_<index> = OpAccessChain %_ptr_Function_float %temp_v4float %uint_1
%t51_<index> = OpLoad %float %t50_<index>
OpStore %<dst_value1> %t51_<index>
%t54_<index> = OpAccessChain %_ptr_Function_float %temp_v4float %uint_2
%t55_<index> = OpLoad %float %t54_<index>
OpStore %<dst_value2> %t55_<index>
%t57_<index> = OpAccessChain %_ptr_Function_float %temp_v4float %uint_3
%t58_<index> = OpLoad %float %t57_<index>
OpStore %<dst_value3> %t58_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<index>", String::FromPrintf("%u", index))
.ReplaceStr(U"<src0_value0>", src0_value0.value)
.ReplaceStr(U"<src0_value1>", src0_value1.value)
.ReplaceStr(U"<src0_value2>", src0_value2.value)
.ReplaceStr(U"<src1_value0>", src1_value0.value)
.ReplaceStr(U"<dst_value0>", dst_value0.value)
.ReplaceStr(U"<dst_value1>", dst_value1.value)
.ReplaceStr(U"<dst_value2>", dst_value2.value)
.ReplaceStr(U"<dst_value3>", dst_value3.value);
return true;
}
return false;
}
/* XXX: Andn2, Or, Nor, Cselect */
KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12)
{
@ -3529,7 +3596,7 @@ KYTY_RECOMPILER_FUNC(Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1)
return true;
}
/* XXX: Gt */
/* XXX: Gt, Ge */
KYTY_RECOMPILER_FUNC(Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -4463,6 +4530,7 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_Exp_Param_XXX_Vsrc0Vsrc1Vsrc2Vsrc3, ShaderInstructionType::Exp, ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3, {U"param3"}},
{Recompile_Exp_Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done, ShaderInstructionType::Exp, ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done, {U""}},
{Recompile_ImageLoad_Vdata4Vaddr3StDmaskF, ShaderInstructionType::ImageLoad, ShaderInstructionFormat::Vdata4Vaddr3StDmaskF, {U""}},
{Recompile_ImageSample_Vdata3Vaddr3StSsDmask7, ShaderInstructionType::ImageSample, ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7, {U""}},
{Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF, ShaderInstructionType::ImageSample, ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF, {U""}},
@ -4484,12 +4552,28 @@ static RecompilerFunc g_recomp_func[] = {
U"%tb_<index> = OpBitwiseAnd %uint %t0_<index> %ta_<index>",
U"%tc_<index> = OpNot %uint %t3_<index>",
U"%td_<index> = OpBitwiseAnd %uint %t1_<index> %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrn2B64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpNot %uint %t2_<index>",
U"%tb_<index> = OpBitwiseOr %uint %t0_<index> %ta_<index>",
U"%tc_<index> = OpNot %uint %t3_<index>",
U"%td_<index> = OpBitwiseOr %uint %t1_<index> %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SAndB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_<index> = OpBitwiseAnd %uint %t0_<index> %t2_<index>",
U"%td_<index> = OpBitwiseAnd %uint %t1_<index> %t3_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%tb_<index> = OpNot %uint %ta_<index>",
U"%tc_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>",
U"%td_<index> = OpNot %uint %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNandB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpBitwiseAnd %uint %t0_<index> %t2_<index>",
U"%tb_<index> = OpNot %uint %ta_<index>",
U"%tc_<index> = OpBitwiseAnd %uint %t1_<index> %t3_<index>",
U"%td_<index> = OpNot %uint %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SXnorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpBitwiseXor %uint %t0_<index> %t2_<index>",
U"%tb_<index> = OpNot %uint %ta_<index>",
U"%tc_<index> = OpBitwiseXor %uint %t1_<index> %t3_<index>",
U"%td_<index> = OpNot %uint %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%td_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SXorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_<index> = OpBitwiseXor %uint %t0_<index> %t2_<index>",
U"%td_<index> = OpBitwiseXor %uint %t1_<index> %t3_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SCselectB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ts_<index> = OpLoad %uint %scc",
U"%tsb_<index> = OpINotEqual %bool %ts_<index> %uint_0",
U"%tb_<index> = OpSelect %uint %tsb_<index> %t0_<index> %t2_<index>",
@ -4531,6 +4615,7 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovB32, ShaderInstructionFormat::SVdstSVsrc0, {U""}},
{Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovkI32, ShaderInstructionFormat::SVdstSVsrc0, {U""}},
{Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VBfrevB32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_<index> = OpBitReverse %uint %t0_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VNotB32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_<index> = OpNot %uint %t0_<index>"}},
{Recompile_V_XXX_F32_SVdstSVsrc0, ShaderInstructionType::VRcpF32, ShaderInstructionFormat::SVdstSVsrc0, {U"%t_<index> = OpFDiv %float %float_1_000000 %t0_<index>"}},
@ -4602,6 +4687,7 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxEqU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpIEqual"}},
{Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxNeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}},
{Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxGtU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThan"}},
{Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxGeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThanEqual"}},
{Recompile_SCmp_XXX_I32_Ssrc0Ssrc1, ShaderInstructionType::SCmpEqI32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpIEqual"}},
{Recompile_SCmp_XXX_I32_Ssrc0Ssrc1, ShaderInstructionType::SCmpGeI32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpSGreaterThanEqual"}},
@ -4828,10 +4914,22 @@ void Spirv::GenerateSource()
switch (m_code.GetType())
{
case ShaderType::Pixel: m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr); break;
case ShaderType::Vertex: m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr); break;
case ShaderType::Compute: m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr); break;
default: m_bind = nullptr; break;
case ShaderType::Pixel:
m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr);
m_bind_params = (m_ps_input_info != nullptr ? ShaderGetBindParametersPS(m_code, m_ps_input_info) : ShaderBindParameters());
break;
case ShaderType::Vertex:
m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr);
m_bind_params = (m_vs_input_info != nullptr ? ShaderGetBindParametersVS(m_code, m_vs_input_info) : ShaderBindParameters());
break;
case ShaderType::Compute:
m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr);
m_bind_params = (m_cs_input_info != nullptr ? ShaderGetBindParametersCS(m_code, m_cs_input_info) : ShaderBindParameters());
break;
default:
m_bind = nullptr;
m_bind_params = ShaderBindParameters();
break;
}
WriteHeader();
@ -5180,28 +5278,36 @@ void Spirv::WriteTypes()
}
static const char32_t* storage_buffers_types = UR"(
%buffers_runtimearr_float = OpTypeRuntimeArray %float
%BufferObject = OpTypeStruct %buffers_runtimearr_float
%buffers_num_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_BufferObject_uint_<buffers_num> = OpTypeArray %BufferObject %buffers_num_uint_<buffers_num>
%buffers_runtimearr_float = OpTypeRuntimeArray %float
%BufferObject = OpTypeStruct %buffers_runtimearr_float
%buffers_num_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_BufferObject_uint_<buffers_num> = OpTypeArray %BufferObject %buffers_num_uint_<buffers_num>
%_ptr_StorageBuffer__arr_BufferObject_uint_<buffers_num> = OpTypePointer StorageBuffer %_arr_BufferObject_uint_<buffers_num>
)";
static const char32_t* textures_types = UR"(
%SampledImage = OpTypeImage %float 2D 0 0 0 1 Unknown
%textures2D_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_SampledImage_uint_<buffers_num> = OpTypeArray %SampledImage %textures2D_uint_<buffers_num>
%_ptr_UniformConstant__arr_SampledImage_uint_<buffers_num> = OpTypePointer UniformConstant %_arr_SampledImage_uint_<buffers_num>
%_ptr_UniformConstant_SampledImage = OpTypePointer UniformConstant %SampledImage
%_SampledImage = OpTypeSampledImage %SampledImage
static const char32_t* textures_sampled_types = UR"(
%Image = OpTypeImage %float 2D 0 0 0 1 Unknown
%textures2D_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_Image_uint_<buffers_num> = OpTypeArray %Image %textures2D_uint_<buffers_num>
%_ptr_UniformConstant__arr_Image_uint_<buffers_num> = OpTypePointer UniformConstant %_arr_Image_uint_<buffers_num>
%_ptr_UniformConstant_Image = OpTypePointer UniformConstant %Image
%SampledImage = OpTypeSampledImage %Image
)";
static const char32_t* textures_loaded_types = UR"(
%Image = OpTypeImage %float 2D 0 0 0 2 Rgba8
%textures2D_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_Image_uint_<buffers_num> = OpTypeArray %Image %textures2D_uint_<buffers_num>
%_ptr_UniformConstant__arr_Image_uint_<buffers_num> = OpTypePointer UniformConstant %_arr_Image_uint_<buffers_num>
%_ptr_UniformConstant_Image = OpTypePointer UniformConstant %Image
)";
static const char32_t* samplers_types = UR"(
%Sampler = OpTypeSampler
%samplers_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_Sampler_uint_<buffers_num> = OpTypeArray %Sampler %samplers_uint_<buffers_num>
%Sampler = OpTypeSampler
%samplers_uint_<buffers_num> = OpConstant %uint <buffers_num>
%_arr_Sampler_uint_<buffers_num> = OpTypeArray %Sampler %samplers_uint_<buffers_num>
%_ptr_UniformConstant__arr_Sampler_uint_<buffers_num> = OpTypePointer UniformConstant %_arr_Sampler_uint_<buffers_num>
%_ptr_UniformConstant_Sampler = OpTypePointer UniformConstant %Sampler
%_ptr_UniformConstant_Sampler = OpTypePointer UniformConstant %Sampler
)";
static const char32_t* gds_types = UR"(
@ -5211,13 +5317,13 @@ void Spirv::WriteTypes()
)";
static const char32_t* vsharp_types = UR"(
%vsharp_buffers_num_uint_<buffers_num> = OpConstant %uint <buffers_num>
%vsharp_num_uint_4 = OpConstant %uint 4
%vsharp_arr_uint_uint_4 = OpTypeArray %uint %vsharp_num_uint_4
%vsharp_buffers_num_uint_<buffers_num> = OpConstant %uint <buffers_num>
%vsharp_num_uint_4 = OpConstant %uint 4
%vsharp_arr_uint_uint_4 = OpTypeArray %uint %vsharp_num_uint_4
%vsharp_arr__arr_uint_uint_4_uint_<buffers_num> = OpTypeArray %vsharp_arr_uint_uint_4 %vsharp_buffers_num_uint_<buffers_num>
%BufferResource = OpTypeStruct %vsharp_arr__arr_uint_uint_4_uint_<buffers_num>
%_ptr_PushConstant_BufferResource = OpTypePointer PushConstant %BufferResource
%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint
%BufferResource = OpTypeStruct %vsharp_arr__arr_uint_uint_4_uint_<buffers_num>
%_ptr_PushConstant_BufferResource = OpTypePointer PushConstant %BufferResource
%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint
)";
if (m_bind != nullptr)
@ -5229,7 +5335,8 @@ void Spirv::WriteTypes()
}
if (m_bind->textures2D.textures_num > 0)
{
m_source += String(textures_types).ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind->textures2D.textures_num));
m_source += String(m_bind_params.textures2D_without_sampler ? textures_loaded_types : textures_sampled_types)
.ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind->textures2D.textures_num));
}
if (m_bind->samplers.samplers_num > 0)
{
@ -5297,7 +5404,7 @@ void Spirv::WriteGlobalVariables()
}
if (m_bind->textures2D.textures_num > 0)
{
vars.Add(String::FromPrintf("%%textures2D = OpVariable %%_ptr_UniformConstant__arr_SampledImage_uint_%d UniformConstant",
vars.Add(String::FromPrintf("%%textures2D = OpVariable %%_ptr_UniformConstant__arr_Image_uint_%d UniformConstant",
m_bind->textures2D.textures_num));
}
if (m_bind->samplers.samplers_num > 0)

View file

@ -47,6 +47,63 @@ static VkComponentSwizzle get_swizzle(uint8_t s)
return VK_COMPONENT_SWIZZLE_IDENTITY;
}
static bool CheckFormat(GraphicContext* ctx, VkImageCreateInfo* image_info)
{
VkImageFormatProperties props {};
if (vkGetPhysicalDeviceImageFormatProperties(ctx->physical_device, image_info->format, image_info->imageType, image_info->tiling,
image_info->usage, image_info->flags, &props) == VK_ERROR_FORMAT_NOT_SUPPORTED)
{
if (image_info->format == VK_FORMAT_R8G8B8A8_SRGB)
{
// TODO() convert SRGB -> LINEAR in shader
image_info->format = VK_FORMAT_R8G8B8A8_UNORM;
bool result = CheckFormat(ctx, image_info);
printf("replace VK_FORMAT_R8G8B8A8_SRGB => VK_FORMAT_R8G8B8A8_UNORM [%s]\n", (!result ? "FAIL" : "SUCCESS"));
return result;
}
if (image_info->format == VK_FORMAT_B8G8R8A8_SRGB)
{
// TODO() convert SRGB -> LINEAR in shader
image_info->format = VK_FORMAT_B8G8R8A8_UNORM;
bool result = CheckFormat(ctx, image_info);
printf("replace VK_FORMAT_B8G8R8A8_SRGB => VK_FORMAT_B8G8R8A8_UNORM [%s]\n", (!result ? "FAIL" : "SUCCESS"));
return result;
}
return false;
}
return true;
}
static bool CheckSwizzle(GraphicContext* /*ctx*/, VkImageCreateInfo* image_info, VkComponentMapping* components)
{
if ((image_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) != 0)
{
if (components->r == VK_COMPONENT_SWIZZLE_R && components->g == VK_COMPONENT_SWIZZLE_G && components->b == VK_COMPONENT_SWIZZLE_B &&
components->a == VK_COMPONENT_SWIZZLE_A)
{
return true;
}
if (components->r == VK_COMPONENT_SWIZZLE_B && components->g == VK_COMPONENT_SWIZZLE_G && components->b == VK_COMPONENT_SWIZZLE_R &&
components->a == VK_COMPONENT_SWIZZLE_A && image_info->format == VK_FORMAT_R8G8B8A8_SRGB)
{
printf("replace VK_FORMAT_R8G8B8A8_SRGB => VK_FORMAT_B8G8R8A8_SRGB\n");
components->r = VK_COMPONENT_SWIZZLE_R;
components->g = VK_COMPONENT_SWIZZLE_G;
components->b = VK_COMPONENT_SWIZZLE_B;
components->a = VK_COMPONENT_SWIZZLE_A;
image_info->format = VK_FORMAT_B8G8R8A8_SRGB;
return true;
}
// TODO() swizzle channels in shader
return false;
}
return true;
}
void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, VulkanMemory* mem) const
{
KYTY_PROFILER_BLOCK("TextureObject::Create");
@ -55,12 +112,29 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui
EXIT_IF(mem == nullptr);
EXIT_IF(ctx == nullptr);
auto dfmt = params[PARAM_DFMT];
auto nfmt = params[PARAM_NFMT];
auto width = params[PARAM_WIDTH];
auto height = params[PARAM_HEIGHT];
auto dfmt = params[PARAM_DFMT_NFMT] >> 32u;
auto nfmt = params[PARAM_DFMT_NFMT] & 0xffffffffu;
auto width = params[PARAM_WIDTH_HEIGHT] >> 32u;
auto height = params[PARAM_WIDTH_HEIGHT] & 0xffffffffu;
auto levels = params[PARAM_LEVELS];
auto swizzle = params[PARAM_SWIZZLE];
auto usage = params[PARAM_USAGE];
VkImageUsageFlags vk_usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
switch (usage)
{
case TEXTURE_USAGE_SAMPLED: vk_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; break;
case TEXTURE_USAGE_STORAGE: vk_usage |= VK_IMAGE_USAGE_STORAGE_BIT; break;
default: EXIT("unknown usage: %u\n", static_cast<uint32_t>(usage));
}
VkComponentMapping components {};
components.r = get_swizzle(swizzle & 0xffu);
components.g = get_swizzle((swizzle >> 8u) & 0xffu);
components.b = get_swizzle((swizzle >> 16u) & 0xffu);
components.a = get_swizzle((swizzle >> 24u) & 0xffu);
auto pixel_format = get_texture_format(dfmt, nfmt);
@ -70,29 +144,39 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui
auto* vk_obj = new TextureVulkanImage;
vk_obj->extent.width = width;
vk_obj->extent.height = height;
vk_obj->format = pixel_format;
vk_obj->image = nullptr;
vk_obj->image_view = nullptr;
VkImageCreateInfo image_info {};
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info.pNext = nullptr;
image_info.flags = 0;
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.extent.width = vk_obj->extent.width;
image_info.extent.height = vk_obj->extent.height;
image_info.extent.width = width;
image_info.extent.height = height;
image_info.extent.depth = 1;
image_info.mipLevels = levels;
image_info.arrayLayers = 1;
image_info.format = vk_obj->format;
image_info.format = pixel_format;
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
image_info.usage = vk_usage;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
if (!CheckSwizzle(ctx, &image_info, &components))
{
EXIT("swizzle is not supported");
}
if (!CheckFormat(ctx, &image_info))
{
EXIT("format is not supported");
}
vk_obj->extent.width = width;
vk_obj->extent.height = height;
vk_obj->format = image_info.format;
vk_obj->image = nullptr;
vk_obj->image_view = nullptr;
vkCreateImage(ctx->device, &image_info, nullptr, &vk_obj->image);
EXIT_NOT_IMPLEMENTED(vk_obj->image == nullptr);
@ -109,8 +193,6 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui
vk_obj->memory = *mem;
// EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size);
GetUpdateFunc()(ctx, params, vk_obj, vaddr, size, vaddr_num);
VkImageViewCreateInfo create_info {};
@ -120,10 +202,7 @@ void* TextureObject::Create(GraphicContext* ctx, const uint64_t* vaddr, const ui
create_info.image = vk_obj->image;
create_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
create_info.format = vk_obj->format;
create_info.components.r = get_swizzle(swizzle & 0xffu);
create_info.components.g = get_swizzle((swizzle >> 8u) & 0xffu);
create_info.components.b = get_swizzle((swizzle >> 16u) & 0xffu);
create_info.components.a = get_swizzle((swizzle >> 24u) & 0xffu);
create_info.components = components;
create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
create_info.subresourceRange.baseArrayLayer = 0;
create_info.subresourceRange.baseMipLevel = 0;
@ -149,24 +228,36 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj,
auto* vk_obj = static_cast<TextureVulkanImage*>(obj);
bool tile = (params[TextureObject::PARAM_TILE] != 0);
auto dfmt = params[TextureObject::PARAM_DFMT];
auto nfmt = params[TextureObject::PARAM_NFMT];
auto width = params[TextureObject::PARAM_WIDTH];
auto height = params[TextureObject::PARAM_HEIGHT];
auto dfmt = params[TextureObject::PARAM_DFMT_NFMT] >> 32u;
auto nfmt = params[TextureObject::PARAM_DFMT_NFMT] & 0xffffffffu;
auto width = params[TextureObject::PARAM_WIDTH_HEIGHT] >> 32u;
auto height = params[TextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu;
auto levels = params[TextureObject::PARAM_LEVELS];
auto pitch = params[TextureObject::PARAM_PITCH];
auto usage = params[TextureObject::PARAM_USAGE];
bool neo = Config::IsNeo();
VkImageLayout vk_layout = VK_IMAGE_LAYOUT_UNDEFINED;
switch (usage)
{
case TextureObject::TEXTURE_USAGE_SAMPLED: vk_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break;
case TextureObject::TEXTURE_USAGE_STORAGE: vk_layout = VK_IMAGE_LAYOUT_GENERAL; break;
default: EXIT("unknown usage: %u\n", static_cast<uint32_t>(usage));
}
EXIT_NOT_IMPLEMENTED(levels >= 16);
uint32_t level_sizes[16];
TileGetTextureSize(dfmt, nfmt, width, height, levels, tile, neo, nullptr, level_sizes, nullptr, nullptr);
TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr, nullptr);
// dbg_test_mipmaps(ctx, VK_FORMAT_BC3_SRGB_BLOCK, 512, 512);
uint32_t offset = 0;
uint32_t mip_width = width;
uint32_t mip_height = height;
uint32_t mip_pitch = pitch;
Vector<BufferImageCopy> regions(levels);
for (uint32_t i = 0; i < levels; i++)
@ -176,6 +267,7 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj,
regions[i].offset = offset;
regions[i].width = mip_width;
regions[i].height = mip_height;
regions[i].pitch = mip_pitch;
offset += level_sizes[i];
@ -187,24 +279,29 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj,
{
mip_height /= 2;
}
if (mip_pitch > 1)
{
mip_pitch /= 2;
}
}
if (tile)
{
EXIT_NOT_IMPLEMENTED(pitch != width);
auto* temp_buf = new uint8_t[*size];
TileConvertTiledToLinear(temp_buf, reinterpret_cast<void*>(*vaddr), TileMode::TextureTiled, dfmt, nfmt, width, height, levels, neo);
UtilFillImage(ctx, vk_obj, temp_buf, *size, regions);
UtilFillImage(ctx, vk_obj, temp_buf, *size, regions, static_cast<uint64_t>(vk_layout));
delete[] temp_buf;
} else
{
UtilFillImage(ctx, vk_obj, reinterpret_cast<void*>(*vaddr), *size, regions);
UtilFillImage(ctx, vk_obj, reinterpret_cast<void*>(*vaddr), *size, regions, static_cast<uint64_t>(vk_layout));
}
}
bool TextureObject::Equal(const uint64_t* other) const
{
return (params[PARAM_DFMT] == other[PARAM_DFMT] && params[PARAM_NFMT] == other[PARAM_NFMT] &&
params[PARAM_WIDTH] == other[PARAM_WIDTH] && params[PARAM_HEIGHT] == other[PARAM_HEIGHT] &&
return (params[PARAM_DFMT_NFMT] == other[PARAM_DFMT_NFMT] && params[PARAM_PITCH] == other[PARAM_PITCH] &&
params[PARAM_WIDTH_HEIGHT] == other[PARAM_WIDTH_HEIGHT] && params[PARAM_USAGE] == other[PARAM_USAGE] &&
params[PARAM_LEVELS] == other[PARAM_LEVELS] && params[PARAM_TILE] == other[PARAM_TILE] &&
params[PARAM_NEO] == other[PARAM_NEO] && params[PARAM_SWIZZLE] == other[PARAM_SWIZZLE]);
}

View file

@ -423,7 +423,7 @@ void TileConvertTiledToLinear(void* dst, const void* src, TileMode mode, uint32_
uint32_t padded_height[16] = {0};
uint32_t level_sizes[16] = {0};
TileGetTextureSize(dfmt, nfmt, width, height, levels, true, neo, nullptr, level_sizes, padded_width, padded_height);
TileGetTextureSize(dfmt, nfmt, width, height, width, levels, true, neo, nullptr, level_sizes, padded_width, padded_height);
uint32_t mip_width = width;
uint32_t mip_height = height;
@ -558,45 +558,60 @@ void TileGetDepthSize(uint32_t width, uint32_t height, uint32_t z_format, uint32
*stencil_size = 0;
}
void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size)
void TileGetVideoOutSize(uint32_t width, uint32_t height, bool tile, bool neo, uint32_t* size, uint32_t* pitch)
{
EXIT_IF(size == nullptr);
EXIT_IF(pitch == nullptr);
uint32_t ret_size = 0;
uint32_t ret_pitch = 0;
if (width == 1920 && height == 1080 && tile && !neo)
{
*size = 8355840;
ret_size = 8355840;
ret_pitch = 1920;
}
if (width == 1920 && height == 1080 && tile && neo)
{
*size = 8847360;
ret_size = 8847360;
ret_pitch = 1920;
}
if (width == 1920 && height == 1080 && !tile && !neo)
{
*size = 8294400;
ret_size = 8294400;
ret_pitch = 1920;
}
if (width == 1920 && height == 1080 && !tile && neo)
{
*size = 8294400;
ret_size = 8294400;
ret_pitch = 1920;
}
if (width == 1280 && height == 720 && tile && !neo)
{
*size = 3932160;
ret_size = 3932160;
ret_pitch = 1280;
}
if (width == 1280 && height == 720 && tile && neo)
{
*size = 3932160;
ret_size = 3932160;
ret_pitch = 1280;
}
if (width == 1280 && height == 720 && !tile && !neo)
{
*size = 3686400;
ret_size = 3686400;
ret_pitch = 1280;
}
if (width == 1280 && height == 720 && !tile && neo)
{
*size = 3686400;
ret_size = 3686400;
ret_pitch = 1280;
}
*size = ret_size;
*pitch = ret_pitch;
}
void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t levels, bool tile, bool neo,
void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, bool tile, bool neo,
uint32_t* total_size, uint32_t* level_sizes, uint32_t* padded_width, uint32_t* padded_height)
{
struct Padded
@ -643,8 +658,8 @@ void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t h
for (const auto& i: infos)
{
if (i.dfmt == dfmt && i.nfmt == nfmt && i.width == width && i.height == height && i.levels >= levels && i.tile == tile &&
i.neo == neo)
if (i.dfmt == dfmt && i.nfmt == nfmt && i.width == width && i.width == pitch && i.height == height && i.levels >= levels &&
i.tile == tile && i.neo == neo)
{
for (uint32_t l = 0; l < levels; l++)
{
@ -665,6 +680,20 @@ void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t h
padded_height[l] = i.padded[l].height;
}
}
return;
}
}
if (!tile && levels == 1 && dfmt == 10 && nfmt == 9)
{
uint32_t size = pitch * height * 4;
if (total_size != nullptr)
{
*total_size = size;
}
if (level_sizes != nullptr)
{
level_sizes[0] = size;
}
}
}

View file

@ -82,7 +82,7 @@ static void set_image_layout(VkCommandBuffer buffer, VkImage image, uint32_t lev
vkCmdPipelineBarrier(buffer, src_stages, dest_stages, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOutVulkanImage* dst_image)
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, uint32_t src_pitch, VideoOutVulkanImage* dst_image)
{
EXIT_IF(src_buffer == nullptr);
EXIT_IF(src_buffer->buffer == nullptr);
@ -96,7 +96,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOut
VkBufferImageCopy region {};
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferRowLength = (src_pitch != dst_image->extent.width ? src_pitch : 0);
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
@ -114,7 +114,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, VideoOut
}
void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureVulkanImage* dst_image,
const Vector<BufferImageCopy>& regions)
const Vector<BufferImageCopy>& regions, uint64_t dst_layout)
{
EXIT_IF(src_buffer == nullptr);
EXIT_IF(src_buffer->buffer == nullptr);
@ -131,7 +131,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureV
for (const auto& r: regions)
{
region[index].bufferOffset = r.offset;
region[index].bufferRowLength = 0;
region[index].bufferRowLength = (r.width != r.pitch ? r.pitch : 0);
region[index].bufferImageHeight = 0;
region[index].imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region[index].imageSubresource.mipLevel = index;
@ -148,7 +148,7 @@ void UtilBufferToImage(CommandBuffer* buffer, VulkanBuffer* src_buffer, TextureV
vkCmdCopyBufferToImage(vk_buffer, src_buffer->buffer, dst_image->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, index, region);
set_image_layout(vk_buffer, dst_image->image, index, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
/*VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/ static_cast<VkImageLayout>(dst_layout));
}
void UtilBlitImage(CommandBuffer* buffer, VideoOutVulkanImage* src_image, VulkanSwapchain* dst_swapchain)
@ -231,7 +231,7 @@ void VulkanDeleteBuffer(GraphicContext* gctx, VulkanBuffer* buffer)
buffer->buffer = nullptr;
}
void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* src_data, uint64_t size)
void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void* src_data, uint64_t size, uint32_t src_pitch)
{
KYTY_PROFILER_FUNCTION();
@ -244,10 +244,8 @@ void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void*
VulkanCreateBuffer(ctx, size, &staging_buffer);
void* data = nullptr;
// vkMapMemory(ctx->device, staging_buffer.memory.memory, staging_buffer.memory.offset, size, 0, &data);
VulkanMapMemory(ctx, &staging_buffer.memory, &data);
std::memcpy(data, src_data, size);
// vkUnmapMemory(ctx->device, staging_buffer.memory.memory);
VulkanUnmapMemory(ctx, &staging_buffer.memory);
CommandBuffer buffer;
@ -256,7 +254,7 @@ void UtilFillImage(GraphicContext* ctx, VideoOutVulkanImage* image, const void*
EXIT_NOT_IMPLEMENTED(buffer.IsInvalid());
buffer.Begin();
UtilBufferToImage(&buffer, &staging_buffer, image);
UtilBufferToImage(&buffer, &staging_buffer, src_pitch, image);
buffer.End();
buffer.Execute();
buffer.WaitForFence();
@ -310,7 +308,7 @@ void UtilSetImageLayoutOptimal(VideoOutVulkanImage* image)
}
void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* image, const void* src_data, uint64_t size,
const Vector<BufferImageCopy>& regions)
const Vector<BufferImageCopy>& regions, uint64_t dst_layout)
{
EXIT_IF(ctx == nullptr);
EXIT_IF(image == nullptr);
@ -331,7 +329,7 @@ void UtilFillImage(GraphicContext* ctx, TextureVulkanImage* image, const void* s
EXIT_NOT_IMPLEMENTED(buffer.IsInvalid());
buffer.Begin();
UtilBufferToImage(&buffer, &staging_buffer, image, regions);
UtilBufferToImage(&buffer, &staging_buffer, image, regions, dst_layout);
buffer.End();
buffer.Execute();
buffer.WaitForFence();

View file

@ -5,6 +5,7 @@
#include "Kyty/Core/LinkList.h"
#include "Kyty/Core/String.h"
#include "Kyty/Core/Threads.h"
#include "Kyty/Core/Vector.h"
#include "Emulator/Common.h"
#include "Emulator/Config.h"
@ -30,7 +31,9 @@ LIB_NAME("VideoOut", "VideoOut");
namespace EventQueue = LibKernel::EventQueue;
constexpr int VIDEO_OUT_EVENT_FLIP = 0;
constexpr int VIDEO_OUT_EVENT_FLIP = 0;
constexpr int VIDEO_OUT_EVENT_VBLANK = 1;
constexpr int VIDEO_OUT_EVENT_PRE_VBLANK_START = 2;
struct VideoOutResolutionStatus
{
@ -72,6 +75,16 @@ struct VideoOutFlipStatus
uint32_t reserved1 = 0;
};
struct VideoOutVblankStatus
{
uint64_t count = 0;
uint64_t processTime = 0;
uint64_t tsc = 0;
uint64_t reserved[1] = {0};
uint8_t flags = 0;
uint8_t pad1[7] = {};
};
struct VideoOutBufferSet
{
VideoOutBufferAttribute attr = {};
@ -84,19 +97,25 @@ struct VideoOutBufferInfo
void* buffer = nullptr;
Graphics::VideoOutVulkanImage* buffer_vulkan = nullptr;
uint64_t buffer_size = 0;
uint64_t buffer_pitch = 0;
int set_id = 0;
};
struct VideoOutConfig
{
VideoOutResolutionStatus resolution;
bool opened = false;
int flip_rate = 0;
EventQueue::KernelEqueue flip_eq = nullptr;
VideoOutFlipStatus flip_status;
VideoOutBufferInfo buffers[16];
VideoOutBufferSet buffers_sets[16];
int buffers_sets_num = 0;
Core::Mutex mutex;
VideoOutResolutionStatus resolution;
bool opened = false;
int flip_rate = 0;
Vector<EventQueue::KernelEqueue> flip_eqs;
Vector<EventQueue::KernelEqueue> pre_vblank_eqs;
Vector<EventQueue::KernelEqueue> vblank_eqs;
VideoOutFlipStatus flip_status;
VideoOutVblankStatus pre_vblank_status;
VideoOutVblankStatus vblank_status;
VideoOutBufferInfo buffers[16];
VideoOutBufferSet buffers_sets[16];
int buffers_sets_num = 0;
};
class FlipQueue
@ -158,6 +177,9 @@ public:
FlipQueue& GetFlipQueue() { return m_flip_queue; }
void VblankBegin();
void VblankEnd();
private:
Core::Mutex m_mutex;
VideoOutConfig m_video_out_ctx[VIDEO_OUT_NUM_MAX];
@ -167,8 +189,11 @@ private:
static VideoOutContext* g_video_out_context = nullptr;
static uint64_t calc_buffer_size(const VideoOutBufferAttribute* attribute)
static void calc_buffer_size(const VideoOutBufferAttribute* attribute, uint64_t* size, uint64_t* pitch)
{
EXIT_IF(size == nullptr);
EXIT_IF(pitch == nullptr);
bool tile = attribute->tilingMode == 0;
bool neo = Config::IsNeo();
uint32_t width = attribute->width;
@ -179,10 +204,12 @@ static uint64_t calc_buffer_size(const VideoOutBufferAttribute* attribute)
EXIT_NOT_IMPLEMENTED(attribute->aspectRatio != 0);
EXIT_NOT_IMPLEMENTED(attribute->pixelFormat != 0x80000000);
uint32_t size = 0;
Graphics::TileGetVideoOutSize(width, height, tile, neo, &size);
uint32_t size32 = 0;
uint32_t pitch32 = 0;
Graphics::TileGetVideoOutSize(width, height, tile, neo, &size32, &pitch32);
return size;
*size = size32;
*pitch = pitch32;
}
void VideoOutInit(uint32_t width, uint32_t height)
@ -220,7 +247,9 @@ int VideoOutContext::Open()
}
}
EXIT_IF(m_video_out_ctx[handle].flip_eq != nullptr);
EXIT_IF(!m_video_out_ctx[handle].flip_eqs.IsEmpty());
EXIT_IF(!m_video_out_ctx[handle].pre_vblank_eqs.IsEmpty());
EXIT_IF(!m_video_out_ctx[handle].vblank_eqs.IsEmpty());
EXIT_IF(m_video_out_ctx[handle].flip_rate != 0);
m_video_out_ctx[handle].opened = true;
@ -228,6 +257,8 @@ int VideoOutContext::Open()
m_video_out_ctx[handle].flip_status.flipArg = -1;
m_video_out_ctx[handle].flip_status.currentBuffer = -1;
m_video_out_ctx[handle].flip_status.count = 0;
m_video_out_ctx[handle].pre_vblank_status = VideoOutVblankStatus();
m_video_out_ctx[handle].vblank_status = VideoOutVblankStatus();
return handle;
}
@ -241,11 +272,35 @@ void VideoOutContext::Close(int handle)
m_video_out_ctx[handle].opened = false;
if (m_video_out_ctx[handle].flip_eq != nullptr)
m_video_out_ctx[handle].mutex.Lock();
for (auto& flip_eq: m_video_out_ctx[handle].flip_eqs)
{
EventQueue::KernelDeleteEvent(m_video_out_ctx[handle].flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT);
EXIT_IF(m_video_out_ctx[handle].flip_eq != nullptr);
if (flip_eq != nullptr)
{
auto result = EventQueue::KernelDeleteEvent(flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT);
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
m_video_out_ctx[handle].flip_eqs.Clear();
for (auto& vblank_eq: m_video_out_ctx[handle].pre_vblank_eqs)
{
if (vblank_eq != nullptr)
{
auto result = EventQueue::KernelDeleteEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT);
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
m_video_out_ctx[handle].pre_vblank_eqs.Clear();
for (auto& vblank_eq: m_video_out_ctx[handle].vblank_eqs)
{
if (vblank_eq != nullptr)
{
auto result = EventQueue::KernelDeleteEvent(vblank_eq, VIDEO_OUT_EVENT_PRE_VBLANK_START, EventQueue::KERNEL_EVFILT_VIDEO_OUT);
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
m_video_out_ctx[handle].vblank_eqs.Clear();
m_video_out_ctx[handle].mutex.Unlock();
m_video_out_ctx[handle].flip_rate = 0;
@ -270,6 +325,62 @@ VideoOutConfig* VideoOutContext::Get(int handle)
return m_video_out_ctx + handle;
}
void VideoOutContext::VblankBegin()
{
Core::LockGuard lock(m_mutex);
for (int i = 1; i < VIDEO_OUT_NUM_MAX; i++)
{
auto& ctx = m_video_out_ctx[i];
if (ctx.opened)
{
ctx.mutex.Lock();
ctx.pre_vblank_status.count++;
ctx.pre_vblank_status.processTime = LibKernel::KernelGetProcessTime();
ctx.pre_vblank_status.tsc = LibKernel::KernelReadTsc();
for (auto& vblank_eq: ctx.pre_vblank_eqs)
{
if (vblank_eq != nullptr)
{
auto result = EventQueue::KernelTriggerEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT,
reinterpret_cast<void*>(ctx.pre_vblank_status.count));
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
ctx.mutex.Unlock();
}
}
}
void VideoOutContext::VblankEnd()
{
Core::LockGuard lock(m_mutex);
for (int i = 1; i < VIDEO_OUT_NUM_MAX; i++)
{
auto& ctx = m_video_out_ctx[i];
if (ctx.opened)
{
ctx.mutex.Lock();
ctx.vblank_status.count++;
ctx.vblank_status.processTime = LibKernel::KernelGetProcessTime();
ctx.vblank_status.tsc = LibKernel::KernelReadTsc();
for (auto& vblank_eq: ctx.vblank_eqs)
{
if (vblank_eq != nullptr)
{
auto result = EventQueue::KernelTriggerEvent(vblank_eq, VIDEO_OUT_EVENT_VBLANK, EventQueue::KERNEL_EVFILT_VIDEO_OUT,
reinterpret_cast<void*>(ctx.vblank_status.count));
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
ctx.mutex.Unlock();
}
}
}
VideoOutBufferImageInfo VideoOutContext::FindImage(void* buffer)
{
VideoOutBufferImageInfo ret;
@ -286,9 +397,10 @@ VideoOutBufferImageInfo VideoOutContext::FindImage(void* buffer)
{
if (ctx.buffers[j].buffer == buffer)
{
ret.image = ctx.buffers[j].buffer_vulkan;
ret.buffer_size = ctx.buffers[j].buffer_size;
ret.index = j - ctx.buffers_sets[i].start_index;
ret.image = ctx.buffers[j].buffer_vulkan;
ret.buffer_size = ctx.buffers[j].buffer_size;
ret.buffer_pitch = ctx.buffers[j].buffer_pitch;
ret.index = j - ctx.buffers_sets[i].start_index;
goto END;
}
}
@ -356,33 +468,24 @@ bool FlipQueue::Flip(uint32_t micros)
auto* buffer = r.cfg->buffers[r.index].buffer_vulkan;
// if (buffer->framebuffer == nullptr)
// {
// // TODO(): Flush via GpuMemoryFlush()
// const auto& attribute = r.cfg->buffers_sets[r.cfg->buffers[r.index].set_id].attr;
// auto buffer_size = calc_buffer_size(&attribute);
// EXIT_NOT_IMPLEMENTED(buffer_size == 0);
// Graphics::VideoOutBufferObject vulkan_buffer_info(attribute.pixelFormat, attribute.width, attribute.height,
// (attribute.tilingMode == 0), Config::IsNeo());
// r.cfg->buffers[r.index].buffer_vulkan = static_cast<Graphics::VideoOutVulkanImage*>(
// Graphics::GpuMemoryGetObject(g_video_out_context->GetGraphicCtx(),
// reinterpret_cast<uint64_t>(r.cfg->buffers[r.index].buffer), buffer_size, vulkan_buffer_info));
// EXIT_NOT_IMPLEMENTED(r.cfg->buffers[r.index].buffer_vulkan != buffer);
// }
Graphics::WindowDrawBuffer(buffer);
if (r.cfg->flip_eq != nullptr)
m_mutex.Lock();
r.cfg->mutex.Lock();
for (auto& flip_eq: r.cfg->flip_eqs)
{
auto result = EventQueue::KernelTriggerEvent(r.cfg->flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT,
reinterpret_cast<void*>(r.flip_arg));
EXIT_NOT_IMPLEMENTED(result != OK);
if (flip_eq != nullptr)
{
auto result = EventQueue::KernelTriggerEvent(flip_eq, VIDEO_OUT_EVENT_FLIP, EventQueue::KERNEL_EVFILT_VIDEO_OUT,
reinterpret_cast<void*>(r.flip_arg));
EXIT_NOT_IMPLEMENTED(result != OK);
}
}
r.cfg->mutex.Unlock();
printf("Flip done: %d\n", r.index);
m_mutex.Lock();
m_requests.Remove(first);
m_done_cond_var.Signal();
@ -412,13 +515,27 @@ void FlipQueue::GetFlipStatus(VideoOutConfig* cfg, VideoOutFlipStatus* out)
*out = cfg->flip_status;
}
bool FlipWindow(uint32_t micros)
bool VideoOutFlipWindow(uint32_t micros)
{
EXIT_IF(g_video_out_context == nullptr);
return g_video_out_context->GetFlipQueue().Flip(micros);
}
void VideoOutBeginVblank()
{
EXIT_IF(g_video_out_context == nullptr);
g_video_out_context->VblankBegin();
}
void VideoOutEndVblank()
{
EXIT_IF(g_video_out_context == nullptr);
g_video_out_context->VblankEnd();
}
KYTY_SYSV_ABI int VideoOutOpen(int user_id, int bus_type, int index, const void* param)
{
PRINT_NAME();
@ -511,15 +628,23 @@ static void flip_event_reset_func(LibKernel::EventQueue::KernelEqueueEvent* even
event->event.data = 0;
}
static void flip_event_delete_func(LibKernel::EventQueue::KernelEqueueEvent* event)
static void flip_event_delete_func(EventQueue::KernelEqueue eq, LibKernel::EventQueue::KernelEqueueEvent* event)
{
EXIT_IF(event == nullptr);
EXIT_IF(event->filter.data == nullptr);
EXIT_NOT_IMPLEMENTED(event->event.ident != VIDEO_OUT_EVENT_FLIP);
EXIT_NOT_IMPLEMENTED(event->event.filter != EventQueue::KERNEL_EVFILT_VIDEO_OUT);
if (event->filter.data != nullptr)
{
auto* video_out = static_cast<VideoOutConfig*>(event->filter.data);
EXIT_IF(video_out->flip_eq == nullptr);
video_out->flip_eq = nullptr;
video_out->mutex.Lock();
EXIT_IF(video_out->flip_eqs.IsEmpty());
auto index = video_out->flip_eqs.Find(eq);
EXIT_NOT_IMPLEMENTED(!video_out->flip_eqs.IndexValid(index));
video_out->flip_eqs[index] = nullptr;
video_out->mutex.Unlock();
}
}
@ -531,6 +656,42 @@ static void flip_event_trigger_func(LibKernel::EventQueue::KernelEqueueEvent* ev
event->event.data = reinterpret_cast<intptr_t>(trigger_data);
}
static void vblank_event_reset_func(LibKernel::EventQueue::KernelEqueueEvent* event)
{
EXIT_IF(event == nullptr);
event->triggered = false;
event->event.fflags = 0;
event->event.data = 0;
}
static void vblank_event_delete_func(EventQueue::KernelEqueue eq, LibKernel::EventQueue::KernelEqueueEvent* event)
{
EXIT_IF(event == nullptr);
EXIT_IF(event->filter.data == nullptr);
EXIT_NOT_IMPLEMENTED(event->event.ident != VIDEO_OUT_EVENT_VBLANK);
EXIT_NOT_IMPLEMENTED(event->event.filter != EventQueue::KERNEL_EVFILT_VIDEO_OUT);
if (event->filter.data != nullptr)
{
auto* video_out = static_cast<VideoOutConfig*>(event->filter.data);
video_out->mutex.Lock();
EXIT_IF(video_out->vblank_eqs.IsEmpty());
auto index = video_out->vblank_eqs.Find(eq);
EXIT_NOT_IMPLEMENTED(!video_out->vblank_eqs.IndexValid(index));
video_out->vblank_eqs[index] = nullptr;
video_out->mutex.Unlock();
}
}
static void vblank_event_trigger_func(LibKernel::EventQueue::KernelEqueueEvent* event, void* trigger_data)
{
EXIT_IF(event == nullptr);
event->triggered = true;
event->event.fflags++;
event->event.data = reinterpret_cast<intptr_t>(trigger_data);
}
KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle, void* udata)
{
PRINT_NAME();
@ -539,7 +700,9 @@ KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle,
auto* ctx = g_video_out_context->Get(handle);
EXIT_NOT_IMPLEMENTED(ctx->flip_eq != nullptr);
ctx->mutex.Lock();
EXIT_NOT_IMPLEMENTED(ctx->flip_eqs.Contains(eq));
if (eq == nullptr)
{
@ -547,20 +710,60 @@ KYTY_SYSV_ABI int VideoOutAddFlipEvent(EventQueue::KernelEqueue eq, int handle,
}
EventQueue::KernelEqueueEvent event;
event.triggered = false;
event.event.ident = VIDEO_OUT_EVENT_FLIP;
event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT;
event.event.udata = udata;
event.event.fflags = 0;
event.event.data = 0;
event.filter.delete_func = flip_event_delete_func;
event.filter.reset_func = flip_event_reset_func;
event.filter.trigger_func = flip_event_trigger_func;
event.filter.data = ctx;
event.triggered = false;
event.event.ident = VIDEO_OUT_EVENT_FLIP;
event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT;
event.event.udata = udata;
event.event.fflags = 0;
event.event.data = 0;
event.filter.delete_event_func = flip_event_delete_func;
event.filter.reset_func = flip_event_reset_func;
event.filter.trigger_func = flip_event_trigger_func;
event.filter.data = ctx;
int result = EventQueue::KernelAddEvent(eq, event);
ctx->flip_eq = eq;
ctx->flip_eqs.Add(eq);
ctx->mutex.Unlock();
return result;
}
KYTY_SYSV_ABI int VideoOutAddVblankEvent(LibKernel::EventQueue::KernelEqueue eq, int handle, void* udata)
{
PRINT_NAME();
EXIT_IF(g_video_out_context == nullptr);
auto* ctx = g_video_out_context->Get(handle);
ctx->mutex.Lock();
EXIT_NOT_IMPLEMENTED(ctx->vblank_eqs.Contains(eq));
if (eq == nullptr)
{
return VIDEO_OUT_ERROR_INVALID_EVENT_QUEUE;
}
EventQueue::KernelEqueueEvent event;
event.triggered = false;
event.event.ident = VIDEO_OUT_EVENT_VBLANK;
event.event.filter = EventQueue::KERNEL_EVFILT_VIDEO_OUT;
event.event.udata = udata;
event.event.fflags = 0;
event.event.data = 0;
event.filter.delete_event_func = vblank_event_delete_func;
event.filter.reset_func = vblank_event_reset_func;
event.filter.trigger_func = vblank_event_trigger_func;
event.filter.data = ctx;
int result = EventQueue::KernelAddEvent(eq, event);
ctx->vblank_eqs.Add(eq);
ctx->mutex.Unlock();
return result;
}
@ -615,16 +818,19 @@ KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* con
EXIT_NOT_IMPLEMENTED(attribute->pitchInPixel != attribute->width);
EXIT_NOT_IMPLEMENTED(attribute->option != 0);
auto buffer_size = calc_buffer_size(attribute);
uint64_t buffer_size = 0;
uint64_t buffer_pitch = 0;
calc_buffer_size(attribute, &buffer_size, &buffer_pitch);
EXIT_NOT_IMPLEMENTED(buffer_size == 0);
EXIT_NOT_IMPLEMENTED(buffer_pitch == 0);
ctx->buffers_sets[set_index].start_index = start_index;
ctx->buffers_sets[set_index].num = buffer_num;
ctx->buffers_sets[set_index].attr = *attribute;
Graphics::VideoOutBufferObject vulkan_buffer_info(attribute->pixelFormat, attribute->width, attribute->height,
(attribute->tilingMode == 0), Config::IsNeo());
(attribute->tilingMode == 0), Config::IsNeo(), buffer_pitch);
for (int i = 0; i < buffer_num; i++)
{
@ -636,6 +842,7 @@ KYTY_SYSV_ABI int VideoOutRegisterBuffers(int handle, int start_index, void* con
ctx->buffers[i + start_index].set_id = set_index;
ctx->buffers[i + start_index].buffer = addresses[i];
ctx->buffers[i + start_index].buffer_size = buffer_size;
ctx->buffers[i + start_index].buffer_pitch = buffer_pitch;
ctx->buffers[i + start_index].buffer_vulkan = static_cast<Graphics::VideoOutVulkanImage*>(Graphics::GpuMemoryGetObject(
g_video_out_context->GetGraphicCtx(), reinterpret_cast<uint64_t>(addresses[i]), buffer_size, vulkan_buffer_info));
@ -717,6 +924,44 @@ KYTY_SYSV_ABI int VideoOutGetFlipStatus(int handle, VideoOutFlipStatus* status)
return OK;
}
KYTY_SYSV_ABI int VideoOutGetVblankStatus(int handle, VideoOutVblankStatus* status)
{
PRINT_NAME();
EXIT_IF(g_video_out_context == nullptr);
if (status == nullptr)
{
return VIDEO_OUT_ERROR_INVALID_ADDRESS;
}
auto* ctx = g_video_out_context->Get(handle);
ctx->mutex.Lock();
*status = ctx->vblank_status;
ctx->mutex.Unlock();
printf("\t count = %" PRIu64 "\n", status->count);
printf("\t processTime = %" PRIu64 "\n", status->processTime);
printf("\t tsc = %" PRIu64 "\n", status->tsc);
return OK;
}
KYTY_SYSV_ABI int VideoOutSetWindowModeMargins(int handle, int top, int bottom)
{
PRINT_NAME();
EXIT_IF(g_video_out_context == nullptr);
[[maybe_unused]] auto* ctx = g_video_out_context->Get(handle);
printf("\t top = %d\n", top);
printf("\t bottom = %d\n", bottom);
return OK;
}
} // namespace Kyty::Libs::VideoOut
#endif // KYTY_EMU_ENABLED

View file

@ -34,7 +34,7 @@ void* VideoOutBufferObject::Create(GraphicContext* ctx, const uint64_t* vaddr, c
vk_obj->extent.width = width;
vk_obj->extent.height = height;
vk_obj->format = VK_FORMAT_B8G8R8A8_SRGB; // VK_FORMAT_R8G8B8A8_SRGB;
vk_obj->format = VK_FORMAT_B8G8R8A8_SRGB;
vk_obj->image = nullptr;
vk_obj->image_view = nullptr;
@ -68,12 +68,17 @@ void* VideoOutBufferObject::Create(GraphicContext* ctx, const uint64_t* vaddr, c
EXIT_NOT_IMPLEMENTED(!allocated);
// vkBindImageMemory(ctx->device, vk_obj->image, mem->memory, mem->offset);
VulkanBindImageMemory(ctx, vk_obj, mem);
vk_obj->memory = *mem;
EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size);
printf("VideoOutBufferObject::Create()\n");
printf("\t mem->requirements.size = %" PRIu64 "\n", mem->requirements.size);
printf("\t width = %" PRIu64 "\n", width);
printf("\t height = %" PRIu64 "\n", height);
printf("\t size = %" PRIu64 "\n", *size);
// EXIT_NOT_IMPLEMENTED(mem->requirements.size > *size);
GetUpdateFunc()(ctx, params, vk_obj, vaddr, size, vaddr_num);
@ -130,26 +135,30 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj,
auto* vk_obj = static_cast<VideoOutVulkanImage*>(obj);
bool tiled = (params[VideoOutBufferObject::PARAM_TILED] != 0);
bool neo = (params[VideoOutBufferObject::PARAM_NEO] != 0);
bool tiled = (params[VideoOutBufferObject::PARAM_TILED] != 0);
bool neo = (params[VideoOutBufferObject::PARAM_NEO] != 0);
auto pitch = params[VideoOutBufferObject::PARAM_PITCH];
auto width = params[VideoOutBufferObject::PARAM_WIDTH];
auto height = params[VideoOutBufferObject::PARAM_HEIGHT];
if (tiled && buffer_is_tiled(*vaddr, *size))
{
EXIT_NOT_IMPLEMENTED(width != pitch);
auto* temp_buf = new uint8_t[*size];
TileConvertTiledToLinear(temp_buf, reinterpret_cast<void*>(*vaddr), TileMode::VideoOutTiled,
params[VideoOutBufferObject::PARAM_WIDTH], params[VideoOutBufferObject::PARAM_HEIGHT], neo);
UtilFillImage(ctx, vk_obj, temp_buf, *size);
TileConvertTiledToLinear(temp_buf, reinterpret_cast<void*>(*vaddr), TileMode::VideoOutTiled, width, height, neo);
UtilFillImage(ctx, vk_obj, temp_buf, *size, pitch);
delete[] temp_buf;
} else
{
UtilFillImage(ctx, vk_obj, reinterpret_cast<void*>(*vaddr), *size);
UtilFillImage(ctx, vk_obj, reinterpret_cast<void*>(*vaddr), *size, pitch);
}
}
bool VideoOutBufferObject::Equal(const uint64_t* other) const
{
return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_WIDTH] == other[PARAM_WIDTH] &&
params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_TILED] == other[PARAM_TILED]);
params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_TILED] == other[PARAM_TILED] &&
params[PARAM_PITCH] == other[PARAM_PITCH]);
}
static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem)

View file

@ -387,10 +387,12 @@ void game_show_window(GameApi* game, const Core::Timer& timer)
printf("skip frame %d\n", p->skip_frames);
} else
{
if (VideoOut::FlipWindow(100000))
VideoOut::VideoOutBeginVblank();
if (VideoOut::VideoOutFlipWindow(100000))
{
CalcFrameTime(game, timer.GetTimeS());
}
VideoOut::VideoOutEndVblank();
}
}
p->mutex.Unlock();
@ -1403,6 +1405,39 @@ static VkPhysicalDevice VulkanFindPhysicalDevice(VkInstance instance, VkSurfaceK
skip_device = true;
}
if (!skip_device &&
!CheckFormat(device, VK_FORMAT_R8G8B8A8_SRGB, true, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
{
printf("Format VK_FORMAT_R8G8B8A8_SRGB cannot be used as texture");
skip_device = true;
}
if (!skip_device &&
!CheckFormat(device, VK_FORMAT_R8G8B8A8_SRGB, true, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
{
printf("Format VK_FORMAT_R8G8B8A8_SRGB cannot be used as texture");
if (!skip_device && !CheckFormat(device, VK_FORMAT_R8G8B8A8_UNORM, true,
VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
{
printf("Format VK_FORMAT_R8G8B8A8_UNORM cannot be used as texture");
skip_device = true;
}
}
if (!skip_device &&
!CheckFormat(device, VK_FORMAT_B8G8R8A8_SRGB, true, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
{
printf("Format VK_FORMAT_B8G8R8A8_SRGB cannot be used as texture");
if (!skip_device && !CheckFormat(device, VK_FORMAT_B8G8R8A8_UNORM, true,
VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT))
{
printf("Format VK_FORMAT_B8G8R8A8_UNORM cannot be used as texture");
skip_device = true;
}
}
/*if (!skip_device && !CheckFormat(device, VK_FORMAT_S8_UINT, true, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))
{
printf("Format VK_FORMAT_S8_UINT cannot be used as depth buffer");
@ -1483,7 +1518,7 @@ static VkDevice VulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKH
queue_create_info.queueCount = queue_count;
queue_create_info.pQueuePriorities = queue_priority.GetDataConst();
VkPhysicalDeviceFeatures device_features {};
// VkPhysicalDeviceFeatures device_features {};
VkDeviceCreateInfo create_info {};
create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
@ -1495,7 +1530,7 @@ static VkDevice VulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKH
create_info.ppEnabledLayerNames = (r->enable_validation_layers ? r->required_layers.GetDataConst() : nullptr);
create_info.enabledExtensionCount = device_extensions.Size();
create_info.ppEnabledExtensionNames = device_extensions.GetDataConst();
create_info.pEnabledFeatures = &device_features;
create_info.pEnabledFeatures = nullptr; //&device_features;
VkDevice device = nullptr;

View file

@ -49,9 +49,9 @@ KernelEqueuePrivate::~KernelEqueuePrivate()
{
auto& event = m_events[index];
if (event.filter.delete_func != nullptr)
if (event.filter.delete_event_func != nullptr)
{
event.filter.delete_func(&event);
event.filter.delete_event_func(this, &event);
}
}
}
@ -176,9 +176,9 @@ bool KernelEqueuePrivate::DeleteEvent(uintptr_t ident, int16_t filter)
{
auto& event = m_events[index];
if (event.filter.delete_func != nullptr)
if (event.filter.delete_event_func != nullptr)
{
event.filter.delete_func(&event);
event.filter.delete_event_func(this, &event);
}
m_events.Remove(index);
@ -267,7 +267,7 @@ int KYTY_SYSV_ABI KernelDeleteEqueue(KernelEqueue eq)
int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, int* out, const KernelUseconds* timo)
{
PRINT_NAME();
// PRINT_NAME();
if (eq == nullptr)
{
@ -286,7 +286,7 @@ int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, in
EXIT_NOT_IMPLEMENTED(out == nullptr);
printf("\tEqueue wait: %s\n", eq->GetName().C_Str());
// printf("\tEqueue wait: %s\n", eq->GetName().C_Str());
if (timo == nullptr)
{
@ -306,11 +306,11 @@ int KYTY_SYSV_ABI KernelWaitEqueue(KernelEqueue eq, KernelEvent* ev, int num, in
if (*out == 0)
{
printf("\ttimedout\n");
// printf("\ttimedout\n");
return KERNEL_ERROR_ETIMEDOUT;
}
printf("\treceived %u events\n", *out);
// printf("\treceived %u events\n", *out);
return OK;
}

View file

@ -21,6 +21,7 @@ LIB_DEFINE(InitGraphicsDriver_1)
LIB_FUNC("zwY0YV91TTI", Graphics::GraphicsSubmitCommandBuffers);
LIB_FUNC("xbxNatawohc", Graphics::GraphicsSubmitAndFlipCommandBuffers);
LIB_FUNC("yvZ73uQUqrk", Graphics::GraphicsSubmitDone);
LIB_FUNC("b08AgtPlHPg", Graphics::GraphicsAreSubmitsAllowed);
LIB_FUNC("iBt3Oe00Kvc", Graphics::GraphicsFlushMemory);
LIB_FUNC("b0xyllnVY-I", Graphics::GraphicsAddEqEvent);
LIB_FUNC("PVT+fuoS9gU", Graphics::GraphicsDeleteEqEvent);

View file

@ -246,11 +246,20 @@ static int KYTY_SYSV_ABI write(int d, const char* str, int64_t size)
int size_int = static_cast<int>(size);
printf(FG_BRIGHT_MAGENTA "%.*s" DEFAULT, size_int, str);
emu_printf(FG_BRIGHT_MAGENTA "%.*s" DEFAULT, size_int, str);
return size_int;
}
static int64_t KYTY_SYSV_ABI read(int d, void* buf, uint64_t nbytes)
{
// PRINT_NAME();
EXIT_NOT_IMPLEMENTED(d != 0);
return static_cast<int64_t>(strlen(std::fgets(static_cast<char*>(buf), static_cast<int>(nbytes), stdin)));
}
static int KYTY_SYSV_ABI KernelGetModuleInfoFromAddr(uint64_t addr, int n, ModuleInfo* r)
{
PRINT_NAME();
@ -457,6 +466,7 @@ LIB_DEFINE(InitLibKernel_1_Equeue)
LIB_FUNC("D0OdFMjp46I", EventQueue::KernelCreateEqueue);
LIB_FUNC("jpFjmgAC5AE", EventQueue::KernelDeleteEqueue);
LIB_FUNC("fzyMKs9kim0", EventQueue::KernelWaitEqueue);
LIB_FUNC("vz+pg2zdopI", EventQueue::KernelGetEventUserData);
}
LIB_DEFINE(InitLibKernel_1_EventFlag)
@ -481,6 +491,7 @@ LIB_DEFINE(InitLibKernel_1_Pthread)
LIB_FUNC("6UgtwV+0zb4", LibKernel::PthreadCreate);
LIB_FUNC("3PtV6p3QNX4", LibKernel::PthreadEqual);
LIB_FUNC("onNY9Byn-W8", LibKernel::PthreadJoin);
LIB_FUNC("4qGrR6eoP9Y", LibKernel::PthreadDetach);
LIB_FUNC("How7B8Oet6k", LibKernel::PthreadGetname);
LIB_FUNC("62KCwEMmzcM", LibKernel::PthreadAttrDestroy);
@ -509,6 +520,7 @@ LIB_DEFINE(InitLibKernel_1_Pthread)
LIB_FUNC("QBi7HCK03hw", LibKernel::KernelClockGettime);
LIB_FUNC("ejekcaNQNq0", LibKernel::KernelGettimeofday);
LIB_FUNC("1j3S3n-tTW4", LibKernel::KernelGetTscFrequency);
LIB_FUNC("4J2sUJmuHZQ", LibKernel::KernelGetProcessTime);
LIB_FUNC("7H0iTOciTLo", LibKernel::pthread_mutex_lock_s);
LIB_FUNC("2Z+PpY6CaJg", LibKernel::pthread_mutex_unlock_s);
@ -535,6 +547,7 @@ LIB_DEFINE(InitLibKernel_1)
LIB_FUNC("959qrazPIrg", LibKernel::KernelGetProcParam);
LIB_FUNC("p5EcQeEeJAE", LibKernel::KernelRtldSetApplicationHeapAPI);
LIB_FUNC("FxVZqBAA7ks", LibKernel::write);
LIB_FUNC("DRuBt2pvICk", LibKernel::read);
LIB_FUNC("f7KBOafysXo", LibKernel::KernelGetModuleInfoFromAddr);
LIB_FUNC("zE-wXIZjLoM", LibKernel::KernelDebugRaiseExceptionOnReleaseMode);
LIB_FUNC("OMDRKKAZ8I4", LibKernel::KernelDebugRaiseException);

View file

@ -19,9 +19,12 @@ LIB_DEFINE(InitVideoOut_1)
LIB_FUNC("i6-sR91Wt-4", VideoOut::VideoOutSetBufferAttribute);
LIB_FUNC("CBiu4mCE1DA", VideoOut::VideoOutSetFlipRate);
LIB_FUNC("HXzjK9yI30k", VideoOut::VideoOutAddFlipEvent);
LIB_FUNC("Xru92wHJRmg", VideoOut::VideoOutAddVblankEvent);
LIB_FUNC("w3BY+tAEiQY", VideoOut::VideoOutRegisterBuffers);
LIB_FUNC("U46NwOiJpys", VideoOut::VideoOutSubmitFlip);
LIB_FUNC("SbU3dwp80lQ", VideoOut::VideoOutGetFlipStatus);
LIB_FUNC("1FZBKy8HeNU", VideoOut::VideoOutGetVblankStatus);
LIB_FUNC("MTxxrOCeSig", VideoOut::VideoOutSetWindowModeMargins);
}
} // namespace Kyty::Libs

View file

@ -167,6 +167,35 @@ void SetOutputFile(const String& file_name, Core::File::Encoding enc)
} // namespace Log
void emu_printf(const char* format, ...)
{
EXIT_IF(!Log::g_log_initialized);
EXIT_IF(Log::g_mutex == nullptr);
Log::g_mutex->Lock();
{
va_list args {};
va_start(args, format);
String s;
s.Printf(format, args);
va_end(args);
if (!Log::g_colored_printf)
{
s = Log::RemoveColors(s);
}
::printf("%s", s.C_Str());
if (Log::g_dir == Log::Direction::File && Log::g_file != nullptr)
{
Log::g_file->Write(s);
}
}
Log::g_mutex->Unlock();
}
void printf(const char* format, ...)
{
EXIT_IF(!Log::g_log_initialized);