render depth as texture

This commit is contained in:
InoriRus 2022-04-01 15:42:43 +10:00
parent 838a09c810
commit d4c640462f
22 changed files with 1570 additions and 811 deletions

View file

@ -35,5 +35,5 @@ Define environment variable named Qt5_DIR pointing to the proper version of Qt
MSVC compiler (cl.exe) is not supported!
External dependencies:
* Vulkan SDK 1.2.176.1
* Vulkan SDK 1.2.198.1
* Qt 5.15.0

View file

@ -1,4 +1,4 @@
version: 0.0.11.build-{build}
version: 0.0.12.build-{build}
image: Visual Studio 2019
environment:
matrix:

View file

@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD)
set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000")
endif()
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.11)
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.12)
include(src_script.cmake)

View file

@ -83,6 +83,8 @@ struct VideoOutVulkanImage: public VulkanImage
struct DepthStencilVulkanImage: public VulkanImage
{
bool compressed = false;
VkImageView texture_view = nullptr;
};
struct TextureVulkanImage: public VulkanImage

View file

@ -11,14 +11,16 @@
namespace Kyty::Libs::Graphics {
namespace HW {
struct VsStageRegisters;
} // namespace HW
KYTY_SUBSYSTEM_DEFINE(Graphics);
void GraphicsDbgDumpDcb(const char* type, uint32_t num_dw, uint32_t* cmd_buffer);
int KYTY_SYSV_ABI GraphicsSetVsShader(uint32_t* cmd, uint64_t size, const VsStageRegisters* vs_regs, uint32_t shader_modifier);
int KYTY_SYSV_ABI GraphicsUpdateVsShader(uint32_t* cmd, uint64_t size, const VsStageRegisters* vs_regs, uint32_t shader_modifier);
int KYTY_SYSV_ABI GraphicsSetVsShader(uint32_t* cmd, uint64_t size, const HW::VsStageRegisters* vs_regs, uint32_t shader_modifier);
int KYTY_SYSV_ABI GraphicsUpdateVsShader(uint32_t* cmd, uint64_t size, const HW::VsStageRegisters* vs_regs, uint32_t shader_modifier);
int KYTY_SYSV_ABI GraphicsSetPsShader(uint32_t* cmd, uint64_t size, const uint32_t* ps_regs);
int KYTY_SYSV_ABI GraphicsSetPsShader350(uint32_t* cmd, uint64_t size, const uint32_t* ps_regs);
int KYTY_SYSV_ABI GraphicsUpdatePsShader(uint32_t* cmd, uint64_t size, const uint32_t* ps_regs);

View file

@ -10,8 +10,11 @@
namespace Kyty::Libs::Graphics {
namespace HW {
class HardwareContext;
class UserConfig;
} // namespace HW
class CommandProcessor;
struct VideoOutVulkanImage;
struct DepthStencilVulkanImage;
@ -66,9 +69,10 @@ private:
void GraphicsRenderInit();
void GraphicsRenderCreateContext();
void GraphicsRenderDrawIndex(CommandBuffer* buffer, HardwareContext* ctx, UserConfig* ucfg, uint32_t index_type_and_size,
void GraphicsRenderDrawIndex(CommandBuffer* buffer, HW::HardwareContext* ctx, HW::UserConfig* ucfg, uint32_t index_type_and_size,
uint32_t index_count, const void* index_addr, uint32_t flags, uint32_t type);
void GraphicsRenderDrawIndexAuto(CommandBuffer* buffer, HardwareContext* ctx, UserConfig* ucfg, uint32_t index_count, uint32_t flags);
void GraphicsRenderDrawIndexAuto(CommandBuffer* buffer, HW::HardwareContext* ctx, HW::UserConfig* ucfg, uint32_t index_count,
uint32_t flags);
void GraphicsRenderWriteAtEndOfPipe(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteAtEndOfPipeClockCounter(CommandBuffer* buffer, uint64_t* dst_gpu_addr);
void GraphicsRenderWriteAtEndOfPipe(CommandBuffer* buffer, uint32_t* dst_gpu_addr, uint32_t value);
@ -81,7 +85,7 @@ void GraphicsRenderWriteAtEndOfPipeWithWriteBack(CommandBuffer* buffer, uint64_t
void GraphicsRenderWriteAtEndOfPipeWithInterruptWriteBack(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteAtEndOfPipeWithInterrupt(CommandBuffer* buffer, uint64_t* dst_gpu_addr, uint64_t value);
void GraphicsRenderWriteBack();
void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HardwareContext* ctx, uint32_t thread_group_x, uint32_t thread_group_y,
void GraphicsRenderDispatchDirect(CommandBuffer* buffer, HW::HardwareContext* ctx, uint32_t thread_group_x, uint32_t thread_group_y,
uint32_t thread_group_z, uint32_t mode);
void GraphicsRenderMemoryBarrier(CommandBuffer* buffer);
void GraphicsRenderRenderTextureBarrier(CommandBuffer* buffer, uint64_t vaddr, uint64_t size);

View file

@ -7,7 +7,29 @@
#ifdef KYTY_EMU_ENABLED
namespace Kyty::Libs::Graphics {
namespace Kyty::Libs::Graphics::HW {
struct ColorBase
{
uint64_t addr = 0;
};
struct ColorPitch
{
uint32_t pitch_div8_minus1 = 0;
uint32_t fmask_pitch_div8_minus1 = 0;
};
struct ColorSlice
{
uint32_t slice_div64_minus1 = 0;
};
struct ColorView
{
uint32_t base_array_slice_index = 0;
uint32_t last_array_slice_index = 0;
};
struct ColorInfo
{
@ -23,37 +45,83 @@ struct ColorInfo
uint32_t channel_order = 0;
};
struct ColorAttrib
{
bool force_dest_alpha_to_one = false;
uint32_t tile_mode = 0;
uint32_t fmask_tile_mode = 0;
uint32_t num_samples = 0;
uint32_t num_fragments = 0;
};
struct ColorDcc
{
uint32_t max_uncompressed_block_size = 0;
uint32_t max_compressed_block_size = 0;
uint32_t min_compressed_block_size = 0;
uint32_t color_transform = 0;
bool enable_overwrite_combiner = false;
bool force_independent_blocks = false;
};
struct ColorCmask
{
uint64_t addr = 0;
};
struct ColorCmaskSlice
{
uint32_t slice_minus1 = 0;
};
struct ColorFmask
{
uint64_t addr = 0;
};
struct ColorFmaskSlice
{
uint32_t slice_minus1 = 0;
};
struct ColorClearWord0
{
uint32_t word0 = 0;
};
struct ColorClearWord1
{
uint32_t word1 = 0;
};
struct ColorDccAddr
{
uint64_t addr = 0;
};
struct ColorSize
{
uint32_t width = 0;
uint32_t height = 0;
};
struct RenderTarget
{
uint64_t base_addr = 0;
uint32_t pitch_div8_minus1 = 0;
uint32_t fmask_pitch_div8_minus1 = 0;
uint32_t slice_div64_minus1 = 0;
uint32_t base_array_slice_index = 0;
uint32_t last_array_slice_index = 0;
ColorInfo color_info;
bool force_dest_alpha_to_one = false;
uint32_t tile_mode = 0;
uint32_t fmask_tile_mode = 0;
uint32_t num_samples = 0;
uint32_t num_fragments = 0;
uint32_t dcc_max_uncompressed_block_size = 0;
uint32_t dcc_max_compressed_block_size = 0;
uint32_t dcc_min_compressed_block_size = 0;
uint32_t dcc_color_transform = 0;
bool dcc_enable_overwrite_combiner = false;
bool dcc_force_independent_blocks = false;
uint64_t cmask_addr = 0;
uint32_t cmask_slice_minus1 = 0;
uint64_t fmask_addr = 0;
uint32_t fmask_slice_minus1 = 0;
uint32_t clear_color_word0 = 0;
uint32_t clear_color_word1 = 0;
uint64_t dcc_addr = 0;
uint32_t width = 0;
uint32_t height = 0;
ColorBase base;
ColorPitch pitch;
ColorSlice slice;
ColorView view;
ColorInfo info;
ColorAttrib attrib;
ColorDcc dcc;
ColorCmask cmask;
ColorCmaskSlice cmask_slice;
ColorFmask fmask;
ColorFmaskSlice fmask_slice;
ColorClearWord0 clear_word0;
ColorClearWord1 clear_word1;
ColorDccAddr dcc_addr;
ColorSize size;
};
struct DepthRenderTargetZInfo
@ -231,6 +299,12 @@ struct EqaaControl
bool static_anchor_associations = false;
};
struct ColorControl
{
uint8_t mode = 1;
uint8_t op = 0xCC;
};
struct Viewport
{
float zmin = 0.0f;
@ -246,17 +320,22 @@ struct Viewport
struct ScreenViewport
{
Viewport viewports[15];
uint32_t transform_control = 0;
int scissor_left = 0;
int scissor_top = 0;
int scissor_right = 0;
int scissor_bottom = 0;
uint32_t hw_offset_x = 0;
uint32_t hw_offset_y = 0;
float guard_band_horz_clip = 0.0f;
float guard_band_vert_clip = 0.0f;
float guard_band_horz_discard = 0.0f;
float guard_band_vert_discard = 0.0f;
uint32_t transform_control = 0;
int screen_scissor_left = 0;
int screen_scissor_top = 0;
int screen_scissor_right = 0;
int screen_scissor_bottom = 0;
int generic_scissor_left = 0;
int generic_scissor_top = 0;
int generic_scissor_right = 0;
int generic_scissor_bottom = 0;
bool generic_scissor_window_offset_enable = false;
uint32_t hw_offset_x = 0;
uint32_t hw_offset_y = 0;
float guard_band_horz_clip = 0.0f;
float guard_band_vert_clip = 0.0f;
float guard_band_horz_discard = 0.0f;
float guard_band_vert_discard = 0.0f;
};
struct VsStageRegisters
@ -372,9 +451,22 @@ public:
void Reset() { *this = HardwareContext(); }
void SetRenderTarget(uint32_t slot, const RenderTarget& target) { m_render_targets[slot] = target; }
void SetColorInfo(uint32_t slot, const ColorInfo& color_info) { m_render_targets[slot].color_info = color_info; }
[[nodiscard]] const RenderTarget& GetRenderTargets(uint32_t slot) const { return m_render_targets[slot]; }
void SetColorBase(uint32_t slot, const ColorBase& base) { m_render_targets[slot].base = base; }
void SetColorPitch(uint32_t slot, const ColorPitch& pitch) { m_render_targets[slot].pitch = pitch; }
void SetColorSlice(uint32_t slot, const ColorSlice& slice) { m_render_targets[slot].slice = slice; }
void SetColorView(uint32_t slot, const ColorView& view) { m_render_targets[slot].view = view; }
void SetColorInfo(uint32_t slot, const ColorInfo& info) { m_render_targets[slot].info = info; }
void SetColorAttrib(uint32_t slot, const ColorAttrib& attrib) { m_render_targets[slot].attrib = attrib; }
void SetColorDcc(uint32_t slot, const ColorDcc& dcc) { m_render_targets[slot].dcc = dcc; }
void SetColorCmask(uint32_t slot, const ColorCmask& cmask) { m_render_targets[slot].cmask = cmask; }
void SetColorCmaskSlice(uint32_t slot, const ColorCmaskSlice& cmask_slice) { m_render_targets[slot].cmask_slice = cmask_slice; }
void SetColorFmask(uint32_t slot, const ColorFmask& fmask) { m_render_targets[slot].fmask = fmask; }
void SetColorFmaskSlice(uint32_t slot, const ColorFmaskSlice& fmask_slice) { m_render_targets[slot].fmask_slice = fmask_slice; }
void SetColorClearWord0(uint32_t slot, const ColorClearWord0& clear_word0) { m_render_targets[slot].clear_word0 = clear_word0; }
void SetColorClearWord1(uint32_t slot, const ColorClearWord1& clear_word1) { m_render_targets[slot].clear_word1 = clear_word1; }
void SetColorDccAddr(uint32_t slot, const ColorDccAddr& dcc_addr) { m_render_targets[slot].dcc_addr = dcc_addr; }
void SetColorSize(uint32_t slot, const ColorSize& size) { m_render_targets[slot].size = size; }
[[nodiscard]] const RenderTarget& GetRenderTarget(uint32_t slot) const { return m_render_targets[slot]; }
void SetBlendControl(uint32_t slot, const BlendControl& control) { m_blend_control[slot] = control; }
[[nodiscard]] const BlendControl& GetBlendControl(uint32_t slot) const { return m_blend_control[slot]; }
@ -409,10 +501,18 @@ public:
void SetViewportTransformControl(uint32_t control) { m_screen_viewport.transform_control = control; }
void SetScreenScissor(int left, int top, int right, int bottom)
{
m_screen_viewport.scissor_left = left;
m_screen_viewport.scissor_top = top;
m_screen_viewport.scissor_right = right;
m_screen_viewport.scissor_bottom = bottom;
m_screen_viewport.screen_scissor_left = left;
m_screen_viewport.screen_scissor_top = top;
m_screen_viewport.screen_scissor_right = right;
m_screen_viewport.screen_scissor_bottom = bottom;
}
void SetGenericScissor(int left, int top, int right, int bottom, bool window_offset_enable)
{
m_screen_viewport.generic_scissor_left = left;
m_screen_viewport.generic_scissor_top = top;
m_screen_viewport.generic_scissor_right = right;
m_screen_viewport.generic_scissor_bottom = bottom;
m_screen_viewport.generic_scissor_window_offset_enable = window_offset_enable;
}
void SetHardwareScreenOffset(uint32_t offset_x, uint32_t offset_y)
{
@ -474,6 +574,8 @@ public:
void SetStencilControl(const StencilControl& control) { m_stencil_control = control; }
[[nodiscard]] const StencilMask& GetStencilMask() const { return m_stencil_mask; }
void SetStencilMask(const StencilMask& mask) { m_stencil_mask = mask; }
[[nodiscard]] const ColorControl& GetColorControl() const { return m_color_control; }
void SetColorControl(const ColorControl& control) { m_color_control = control; }
void SetVsUserSgpr(uint32_t id, uint32_t value, UserSgprType type)
{
@ -508,13 +610,19 @@ public:
[[nodiscard]] uint8_t GetStencilClearValue() const { return m_stencil_clear_value; }
void SetStencilClearValue(uint8_t clear_value) { m_stencil_clear_value = clear_value; }
[[nodiscard]] float GetLineWidth() const { return m_line_width; }
void SetLineWidth(float width) { m_line_width = width; }
private:
float m_line_width = 1.0f;
BlendControl m_blend_control[8];
BlendColor m_blend_color;
RenderTarget m_render_targets[8];
uint32_t m_render_target_mask = 0;
ScreenViewport m_screen_viewport;
ClipControl m_clip_control;
ColorControl m_color_control;
VertexShaderInfo m_vs;
PixelShaderInfo m_ps;
@ -580,7 +688,7 @@ inline uint32_t VsStageRegisters::GetUnknown2() const
return m_spiShaderPgmRsrc2Vs & 0xFFFFEFFFu;
}
} // namespace Kyty::Libs::Graphics
} // namespace Kyty::Libs::Graphics::HW
#endif // KYTY_EMU_ENABLED

View file

@ -44,6 +44,7 @@ enum class GpuMemoryScenario
{
Common,
GenerateMips,
TextureTriplet
};
struct GpuMemoryObject
@ -98,8 +99,9 @@ void GpuMemoryFlush(GraphicContext* ctx);
void GpuMemoryFrameDone();
void GpuMemoryWriteBack(GraphicContext* ctx);
bool GpuMemoryCheckAccessViolation(uint64_t vaddr, uint64_t size);
bool GpuMemoryWatcherEnabled();
Vector<GpuMemoryObject> GpuMemoryFindObjects(uint64_t vaddr, uint64_t size, bool exact);
Vector<GpuMemoryObject> GpuMemoryFindObjects(uint64_t vaddr, uint64_t size, bool exact, bool only_first);
bool VulkanAllocate(GraphicContext* ctx, VulkanMemory* mem);
void VulkanFree(GraphicContext* ctx, VulkanMemory* mem);

View file

@ -119,6 +119,18 @@ constexpr uint32_t DB_DEPTH_CLEAR = 0xB;
constexpr uint32_t DB_DEPTH_CLEAR_DEPTH_CLEAR_SHIFT = 0;
constexpr uint32_t DB_DEPTH_CLEAR_DEPTH_CLEAR_MASK = 0xFFFFFFFF;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_TL = 0xC;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_TL_TL_X_SHIFT = 0;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_TL_TL_X_MASK = 0xFFFF;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_TL_TL_Y_SHIFT = 16;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_TL_TL_Y_MASK = 0xFFFF;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_BR = 0xD;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_BR_BR_X_SHIFT = 0;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_BR_BR_X_MASK = 0xFFFF;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_BR_BR_Y_SHIFT = 16;
constexpr uint32_t PA_SC_SCREEN_SCISSOR_BR_BR_Y_MASK = 0xFFFF;
constexpr uint32_t DB_DEPTH_INFO = 0xF;
constexpr uint32_t DB_DEPTH_INFO_ADDR5_SWIZZLE_MASK_SHIFT = 0;
constexpr uint32_t DB_DEPTH_INFO_ADDR5_SWIZZLE_MASK_MASK = 0xF;
@ -178,6 +190,20 @@ constexpr uint32_t DB_DEPTH_SLICE = 0x17;
constexpr uint32_t DB_DEPTH_SLICE_SLICE_TILE_MAX_SHIFT = 0;
constexpr uint32_t DB_DEPTH_SLICE_SLICE_TILE_MAX_MASK = 0x3FFFFF;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL = 0x90;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_TL_X_SHIFT = 0;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_TL_X_MASK = 0x7FFF;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_TL_Y_SHIFT = 16;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_TL_Y_MASK = 0x7FFF;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_WINDOW_OFFSET_DISABLE_SHIFT = 31;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_TL_WINDOW_OFFSET_DISABLE_MASK = 0x1;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR = 0x91;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_X_SHIFT = 0;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_X_MASK = 0x7FFF;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_Y_SHIFT = 16;
constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_Y_MASK = 0x7FFF;
constexpr uint32_t PA_SC_VPORT_ZMIN_0 = 0xB4;
constexpr uint32_t CB_BLEND_RED = 0x105;
@ -277,6 +303,12 @@ constexpr uint32_t DB_EQAA_INTERPOLATE_COMP_Z_MASK = 0x1;
constexpr uint32_t DB_EQAA_STATIC_ANCHOR_ASSOCIATIONS_SHIFT = 20;
constexpr uint32_t DB_EQAA_STATIC_ANCHOR_ASSOCIATIONS_MASK = 0x1;
constexpr uint32_t CB_COLOR_CONTROL = 0x202;
constexpr uint32_t CB_COLOR_CONTROL_MODE_SHIFT = 4;
constexpr uint32_t CB_COLOR_CONTROL_MODE_MASK = 0x7;
constexpr uint32_t CB_COLOR_CONTROL_ROP3_SHIFT = 16;
constexpr uint32_t CB_COLOR_CONTROL_ROP3_MASK = 0xFF;
constexpr uint32_t PA_SU_SC_MODE_CNTL = 0x205;
constexpr uint32_t PA_SU_SC_MODE_CNTL_CULL_FRONT_SHIFT = 0;
constexpr uint32_t PA_SU_SC_MODE_CNTL_CULL_FRONT_MASK = 0x1;
@ -301,6 +333,10 @@ constexpr uint32_t PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST_MASK = 0x1;
constexpr uint32_t PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS_SHIFT = 20;
constexpr uint32_t PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS_MASK = 0x1;
constexpr uint32_t PA_SU_LINE_CNTL = 0x282;
constexpr uint32_t PA_SU_LINE_CNTL_WIDTH_SHIFT = 0;
constexpr uint32_t PA_SU_LINE_CNTL_WIDTH_MASK = 0xFFFF;
constexpr uint32_t DB_HTILE_SURFACE = 0x2AF;
constexpr uint32_t DB_HTILE_SURFACE_LINEAR_SHIFT = 0;
constexpr uint32_t DB_HTILE_SURFACE_LINEAR_MASK = 0x1;

View file

@ -13,9 +13,11 @@
namespace Kyty::Libs::Graphics {
namespace HW {
struct VertexShaderInfo;
struct PixelShaderInfo;
struct ComputeShaderInfo;
} // namespace HW
enum class ShaderType
{
@ -600,18 +602,26 @@ struct ShaderStorageResources
int binding_index = 0;
};
struct ShaderTextureDescriptor
{
ShaderTextureResource texture;
ShaderTextureUsage usage = ShaderTextureUsage::Unknown;
int slot = 0;
int start_register = 0;
bool extended = false;
bool textures2d_without_sampler = false;
};
struct ShaderTextureResources
{
static constexpr int RES_MAX = 16;
ShaderTextureResource textures[RES_MAX];
ShaderTextureUsage usages[RES_MAX] = {};
int slots[RES_MAX] = {0};
int start_register[RES_MAX] = {0};
bool extended[RES_MAX] = {};
int textures_num = 0;
int binding_sampled_index = 0;
int binding_storage_index = 0;
ShaderTextureDescriptor desc[RES_MAX];
int textures_num = 0;
int textures2d_sampled_num = 0;
int textures2d_storage_num = 0;
int binding_sampled_index = 0;
int binding_storage_index = 0;
};
struct ShaderSamplerResources
@ -658,12 +668,12 @@ struct ShaderBindResources
ShaderExtendedResources extended;
};
struct ShaderBindParameters
{
bool textures2d_without_sampler[ShaderTextureResources::RES_MAX] = {};
int textures2d_sampled_num = 0;
int textures2d_storage_num = 0;
};
// struct ShaderBindParameters
//{
// bool textures2d_without_sampler[ShaderTextureResources::RES_MAX] = {};
// int textures2d_sampled_num = 0;
// int textures2d_storage_num = 0;
//};
struct ShaderVertexInputInfo
{
@ -700,28 +710,28 @@ struct ShaderPixelInputInfo
ShaderBindResources bind;
};
void ShaderCalcBindingIndices(ShaderBindResources* bind);
void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info);
void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info);
void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info);
ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info);
ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info);
ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info);
ShaderCode ShaderParseVS(const VertexShaderInfo* regs);
ShaderCode ShaderParsePS(const PixelShaderInfo* regs);
ShaderCode ShaderParseCS(const ComputeShaderInfo* regs);
ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
Vector<uint32_t> ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
Vector<uint32_t> ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
Vector<uint32_t> ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
bool ShaderIsDisabled(uint64_t addr);
void ShaderDisable(uint64_t id);
void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd);
void ShaderCalcBindingIndices(ShaderBindResources* bind);
void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, ShaderVertexInputInfo* info);
void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info);
void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, ShaderComputeInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info);
void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info);
ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info);
ShaderId ShaderGetIdPS(const HW::PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info);
ShaderId ShaderGetIdCS(const HW::ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info);
ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs);
ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs);
ShaderCode ShaderParseCS(const HW::ComputeShaderInfo* regs);
// ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
// ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
// ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
Vector<uint32_t> ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info);
Vector<uint32_t> ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info);
Vector<uint32_t> ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info);
bool ShaderIsDisabled(uint64_t addr);
void ShaderDisable(uint64_t id);
void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd);
} // namespace Kyty::Libs::Graphics

View file

@ -44,11 +44,11 @@ KYTY_SUBSYSTEM_UNEXPECTED_SHUTDOWN(Graphics) {}
KYTY_SUBSYSTEM_DESTROY(Graphics) {}
int KYTY_SYSV_ABI GraphicsSetVsShader(uint32_t* cmd, uint64_t size, const VsStageRegisters* vs_regs, uint32_t shader_modifier)
int KYTY_SYSV_ABI GraphicsSetVsShader(uint32_t* cmd, uint64_t size, const HW::VsStageRegisters* vs_regs, uint32_t shader_modifier)
{
PRINT_NAME();
EXIT_NOT_IMPLEMENTED(size < sizeof(VsStageRegisters) / 4 + 2);
EXIT_NOT_IMPLEMENTED(size < sizeof(HW::VsStageRegisters) / 4 + 2);
printf("\t cmd_buffer = %016" PRIx64 "\n", reinterpret_cast<uint64_t>(cmd));
printf("\t size = %" PRIu64 "\n", size);
@ -64,16 +64,16 @@ int KYTY_SYSV_ABI GraphicsSetVsShader(uint32_t* cmd, uint64_t size, const VsStag
cmd[0] = KYTY_PM4(size, Pm4::IT_NOP, Pm4::R_VS);
cmd[1] = shader_modifier;
memcpy(&cmd[2], vs_regs, sizeof(VsStageRegisters));
memcpy(&cmd[2], vs_regs, sizeof(HW::VsStageRegisters));
return OK;
}
int KYTY_SYSV_ABI GraphicsUpdateVsShader(uint32_t* cmd, uint64_t size, const VsStageRegisters* vs_regs, uint32_t shader_modifier)
int KYTY_SYSV_ABI GraphicsUpdateVsShader(uint32_t* cmd, uint64_t size, const HW::VsStageRegisters* vs_regs, uint32_t shader_modifier)
{
PRINT_NAME();
EXIT_NOT_IMPLEMENTED(size < sizeof(VsStageRegisters) / 4 + 2);
EXIT_NOT_IMPLEMENTED(size < sizeof(HW::VsStageRegisters) / 4 + 2);
printf("\t cmd_buffer = %016" PRIx64 "\n", reinterpret_cast<uint64_t>(cmd));
printf("\t size = %" PRIu64 "\n", size);
@ -89,7 +89,7 @@ int KYTY_SYSV_ABI GraphicsUpdateVsShader(uint32_t* cmd, uint64_t size, const VsS
cmd[0] = KYTY_PM4(size, Pm4::IT_NOP, Pm4::R_VS_UPDATE);
cmd[1] = shader_modifier;
memcpy(&cmd[2], vs_regs, sizeof(VsStageRegisters));
memcpy(&cmd[2], vs_regs, sizeof(HW::VsStageRegisters));
return OK;
}

File diff suppressed because it is too large Load diff

View file

@ -55,8 +55,8 @@ public:
void BufferFlush();
void BufferWait();
HardwareContext* GetCtx();
UserConfig* GetUcfg();
HW::HardwareContext* GetCtx() { return &m_ctx; }
HW::UserConfig* GetUcfg() { return &m_ucfg; }
void SetIndexType(uint32_t index_type_and_size);
void DrawIndex(uint32_t index_count, const void* index_addr, uint32_t flags, uint32_t type);
@ -76,11 +76,11 @@ public:
void WaitFlipDone(uint32_t video_out_handle, uint32_t display_buffer_index);
void TriggerEvent(uint32_t event_type, uint32_t event_index);
void SetUserDataMarker(UserSgprType type) { m_user_data_marker = type; }
[[nodiscard]] UserSgprType GetUserDataMarker() const { return m_user_data_marker; }
void SetEmbeddedDataMarker(const uint32_t* buffer, uint32_t num_dw, uint32_t align) {}
void PushMarker(const char* str) {}
void PopMarker() {}
void SetUserDataMarker(HW::UserSgprType type) { m_user_data_marker = type; }
[[nodiscard]] HW::UserSgprType GetUserDataMarker() const { return m_user_data_marker; }
void SetEmbeddedDataMarker(const uint32_t* buffer, uint32_t num_dw, uint32_t align) {}
void PushMarker(const char* str) {}
void PopMarker() {}
void PrefetchL2(void* addr, uint32_t size) {}
void ClearGds(uint64_t dw_offset, uint32_t dw_num, uint32_t clear_value);
@ -115,10 +115,10 @@ private:
uint32_t value = 0;
};
HardwareContext m_ctx;
UserConfig m_ucfg;
uint32_t m_index_type_and_size = 0;
UserSgprType m_user_data_marker = UserSgprType::Unknown;
HW::HardwareContext m_ctx;
HW::UserConfig m_ucfg;
uint32_t m_index_type_and_size = 0;
HW::UserSgprType m_user_data_marker = HW::UserSgprType::Unknown;
Core::Mutex m_mutex;
@ -484,7 +484,7 @@ void CommandProcessor::Reset()
m_ucfg.Reset();
m_ctx.Reset();
m_index_type_and_size = 0;
m_user_data_marker = UserSgprType::Unknown;
m_user_data_marker = HW::UserSgprType::Unknown;
std::memset(m_const_ram, 0, sizeof(m_const_ram));
}
@ -954,22 +954,6 @@ void CommandProcessor::Run(uint32_t* data, uint32_t num_dw)
}
}
HardwareContext* CommandProcessor::GetCtx()
{
Core::LockGuard lock(m_mutex);
// EXIT_NOT_IMPLEMENTED(m_drawing);
return &m_ctx;
}
UserConfig* CommandProcessor::GetUcfg()
{
Core::LockGuard lock(m_mutex);
return &m_ucfg;
}
void CommandProcessor::SetIndexType(uint32_t index_type_and_size)
{
Core::LockGuard lock(m_mutex);
@ -1141,10 +1125,15 @@ void CommandProcessor::TriggerEvent(uint32_t event_type, uint32_t event_index)
if ((event_type == 0x00000016 || event_type == 0x00000031) && event_index == 0x00000007)
{
// CacheFlushAndInvEvent
// FlushAndInvalidateCbPixelData
MemoryBarrier();
} else if ((event_type == 0x0000002c) && event_index == 0x00000007)
{
// FlushAndInvalidateDbMeta
} else
{
EXIT("unknown event type\n");
EXIT("unknown event type: 0x%08" PRIx32 ", 0x%08" PRIx32 "\n", event_type, event_index);
}
}
@ -1275,7 +1264,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_render_target)
if (cmd_id == 0xC0016900)
{
DepthRenderTargetZInfo r;
HW::DepthRenderTargetZInfo r;
r.expclear_enabled = (buffer[0] & 0x08000000u) != 0;
@ -1294,7 +1283,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_render_target)
{
count = 22;
DepthRenderTarget z;
HW::DepthRenderTarget z;
z.z_info.expclear_enabled = (buffer[0] & 0x08000000u) != 0;
z.z_info.format = (buffer[0] >> Pm4::DB_Z_INFO_FORMAT_SHIFT) & Pm4::DB_Z_INFO_FORMAT_MASK;
@ -1403,7 +1392,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_stencil_info)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_STENCIL_INFO);
DepthRenderTargetStencilInfo r;
HW::DepthRenderTargetStencilInfo r;
r.expclear_enabled = (buffer[0] & 0x08000000u) != 0;
r.tile_split = (buffer[0] >> 13u) & 0x7u;
@ -1423,7 +1412,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_control)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_RENDER_CONTROL);
RenderControl r;
HW::RenderControl r;
r.depth_clear_enable = KYTY_PM4_GET(buffer[0], DB_RENDER_CONTROL, DEPTH_CLEAR_ENABLE) != 0;
r.stencil_clear_enable = KYTY_PM4_GET(buffer[0], DB_RENDER_CONTROL, STENCIL_CLEAR_ENABLE) != 0;
@ -1443,7 +1432,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_mode_control)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::PA_SU_SC_MODE_CNTL);
ModeControl r;
HW::ModeControl r;
r.cull_front = KYTY_PM4_GET(buffer[0], PA_SU_SC_MODE_CNTL, CULL_FRONT) != 0;
r.cull_back = KYTY_PM4_GET(buffer[0], PA_SU_SC_MODE_CNTL, CULL_BACK) != 0;
@ -1462,12 +1451,30 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_mode_control)
return 1;
}
KYTY_HW_CTX_PARSER(hw_ctx_set_line_control)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::PA_SU_LINE_CNTL);
auto line_width = KYTY_PM4_GET(buffer[0], PA_SU_LINE_CNTL, WIDTH);
if (line_width == 8)
{
cp->GetCtx()->SetLineWidth(1.0f);
} else
{
cp->GetCtx()->SetLineWidth(static_cast<float>(line_width) / 8.0f);
}
return 1;
}
KYTY_HW_CTX_PARSER(hw_ctx_set_depth_control)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_DEPTH_CONTROL);
DepthControl r;
HW::DepthControl r;
r.stencil_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCIL_ENABLE) != 0;
r.z_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, Z_ENABLE) != 0;
@ -1488,7 +1495,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_stencil_control)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_STENCIL_CONTROL);
StencilControl r;
HW::StencilControl r;
r.stencil_fail = KYTY_PM4_GET(buffer[0], DB_STENCIL_CONTROL, STENCILFAIL);
r.stencil_zpass = KYTY_PM4_GET(buffer[0], DB_STENCIL_CONTROL, STENCILZPASS);
@ -1507,7 +1514,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_stencil_mask)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0026900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_STENCILREFMASK);
StencilMask r;
HW::StencilMask r;
r.stencil_testval = KYTY_PM4_GET(buffer[0], DB_STENCILREFMASK, STENCILTESTVAL);
r.stencil_mask = KYTY_PM4_GET(buffer[0], DB_STENCILREFMASK, STENCILMASK);
@ -1528,7 +1535,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_eqaa_control)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_EQAA);
EqaaControl r;
HW::EqaaControl r;
r.max_anchor_samples = KYTY_PM4_GET(buffer[0], DB_EQAA, MAX_ANCHOR_SAMPLES);
r.ps_iter_samples = KYTY_PM4_GET(buffer[0], DB_EQAA, PS_ITER_SAMPLES);
@ -1544,6 +1551,21 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_eqaa_control)
return 1;
}
KYTY_HW_CTX_PARSER(hw_ctx_set_color_control)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::CB_COLOR_CONTROL);
HW::ColorControl r;
r.mode = KYTY_PM4_GET(buffer[0], CB_COLOR_CONTROL, MODE);
r.op = KYTY_PM4_GET(buffer[0], CB_COLOR_CONTROL, ROP3);
cp->GetCtx()->SetColorControl(r);
return 1;
}
KYTY_HW_CTX_PARSER(hw_ctx_set_stencil_clear)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
@ -1580,7 +1602,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_blend_control)
uint32_t param = (cmd_offset - Pm4::CB_BLEND0_CONTROL) / 1;
BlendControl r;
HW::BlendControl r;
r.color_srcblend = (buffer[0] >> Pm4::CB_BLEND0_CONTROL_COLOR_SRCBLEND_SHIFT) & Pm4::CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK;
r.color_comb_fcn = (buffer[0] >> Pm4::CB_BLEND0_CONTROL_COLOR_COMB_FCN_SHIFT) & Pm4::CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK;
@ -1599,59 +1621,98 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_blend_control)
KYTY_HW_CTX_PARSER(hw_ctx_set_render_target)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC00E6900);
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC00E6900 && cmd_id != 0xC00B6900);
uint32_t param = (cmd_offset - Pm4::CB_COLOR0_BASE) / 15;
uint32_t count = 14;
uint32_t count = 11;
if (dw >= 16 && buffer[14] == 0xC0001000)
auto* ctx = cp->GetCtx();
HW::ColorBase base;
HW::ColorPitch pitch;
HW::ColorSlice slice;
HW::ColorView view;
HW::ColorInfo info;
HW::ColorAttrib attrib;
HW::ColorDcc dcc;
HW::ColorCmask cmask;
HW::ColorCmaskSlice cmask_slice;
HW::ColorFmask fmask;
HW::ColorFmaskSlice fmask_slice;
base.addr = static_cast<uint64_t>(buffer[0]) << 8u;
pitch.pitch_div8_minus1 = buffer[1] & 0x7ffu;
pitch.fmask_pitch_div8_minus1 = (buffer[1] >> 20u) & 0x7ffu;
slice.slice_div64_minus1 = buffer[2] & 0x3fffffu;
view.base_array_slice_index = buffer[3] & 0x7ffu;
view.last_array_slice_index = (buffer[3] >> 13u) & 0x7ffu;
info.fmask_compression_enable = (buffer[4] & 0x4000u) != 0;
info.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u;
info.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0;
info.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0;
info.neo_mode = (buffer[4] & 0x80000000u) != 0;
info.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u;
info.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u;
info.format = (buffer[4] >> 2u) & 0x1fu;
info.channel_type = (buffer[4] >> 8u) & 0x7u;
info.channel_order = (buffer[4] >> 11u) & 0x3u;
attrib.force_dest_alpha_to_one = (buffer[5] & 0x20000u) != 0;
attrib.tile_mode = buffer[5] & 0x1fu;
attrib.fmask_tile_mode = (buffer[5] >> 5u) & 0x1fu;
attrib.num_samples = (buffer[5] >> 12u) & 0x7u;
attrib.num_fragments = (buffer[5] >> 15u) & 0x3u;
dcc.max_uncompressed_block_size = (buffer[6] >> 2u) & 0x3u;
dcc.max_compressed_block_size = (buffer[6] >> 5u) & 0x3u;
dcc.min_compressed_block_size = (buffer[6] >> 4u) & 0x1u;
dcc.color_transform = (buffer[6] >> 7u) & 0x3u;
dcc.enable_overwrite_combiner = (buffer[6] & 0x1u) != 0;
dcc.force_independent_blocks = (buffer[6] & 0x200u) != 0;
cmask.addr = static_cast<uint64_t>(buffer[7]) << 8u;
cmask_slice.slice_minus1 = buffer[8] & 0x3fffu;
fmask.addr = static_cast<uint64_t>(buffer[9]) << 8u;
fmask_slice.slice_minus1 = buffer[10] & 0x3fffffu;
ctx->SetColorBase(param, base);
ctx->SetColorPitch(param, pitch);
ctx->SetColorSlice(param, slice);
ctx->SetColorView(param, view);
ctx->SetColorInfo(param, info);
ctx->SetColorAttrib(param, attrib);
ctx->SetColorDcc(param, dcc);
ctx->SetColorCmask(param, cmask);
ctx->SetColorCmaskSlice(param, cmask_slice);
ctx->SetColorFmask(param, fmask);
ctx->SetColorFmaskSlice(param, fmask_slice);
if (cmd_id == 0xC00E6900)
{
count = 16;
count = 14;
HW::ColorClearWord0 clear_word0;
HW::ColorClearWord1 clear_word1;
HW::ColorDccAddr dcc_addr;
clear_word0.word0 = buffer[11];
clear_word1.word1 = buffer[12];
dcc_addr.addr = static_cast<uint64_t>(buffer[13]) << 8u;
ctx->SetColorClearWord0(param, clear_word0);
ctx->SetColorClearWord1(param, clear_word1);
ctx->SetColorDccAddr(param, dcc_addr);
}
RenderTarget r;
if (dw >= count + 2 && buffer[count] == 0xC0001000)
{
HW::ColorSize size;
r.base_addr = static_cast<uint64_t>(buffer[0]) << 8u;
r.pitch_div8_minus1 = buffer[1] & 0x7ffu;
r.fmask_pitch_div8_minus1 = (buffer[1] >> 20u) & 0x7ffu;
r.slice_div64_minus1 = buffer[2] & 0x3fffffu;
r.base_array_slice_index = buffer[3] & 0x7ffu;
r.last_array_slice_index = (buffer[3] >> 13u) & 0x7ffu;
size.width = (buffer[count + 1] >> 0u) & 0xffffu;
size.height = (buffer[count + 1] >> 16u) & 0xffffu;
r.color_info.fmask_compression_enable = (buffer[4] & 0x4000u) != 0;
r.color_info.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u;
r.color_info.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0;
r.color_info.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0;
r.color_info.neo_mode = (buffer[4] & 0x80000000u) != 0;
r.color_info.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u;
r.color_info.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u;
r.color_info.format = (buffer[4] >> 2u) & 0x1fu;
r.color_info.channel_type = (buffer[4] >> 8u) & 0x7u;
r.color_info.channel_order = (buffer[4] >> 11u) & 0x3u;
ctx->SetColorSize(param, size);
r.force_dest_alpha_to_one = (buffer[5] & 0x20000u) != 0;
r.tile_mode = buffer[5] & 0x1fu;
r.fmask_tile_mode = (buffer[5] >> 5u) & 0x1fu;
r.num_samples = (buffer[5] >> 12u) & 0x7u;
r.num_fragments = (buffer[5] >> 15u) & 0x3u;
r.dcc_max_uncompressed_block_size = (buffer[6] >> 2u) & 0x3u;
r.dcc_max_compressed_block_size = (buffer[6] >> 5u) & 0x3u;
r.dcc_min_compressed_block_size = (buffer[6] >> 4u) & 0x1u;
r.dcc_color_transform = (buffer[6] >> 7u) & 0x3u;
r.dcc_enable_overwrite_combiner = (buffer[6] & 0x1u) != 0;
r.dcc_force_independent_blocks = (buffer[6] & 0x200u) != 0;
r.cmask_addr = static_cast<uint64_t>(buffer[7]) << 8u;
r.cmask_slice_minus1 = buffer[8] & 0x3fffu;
r.fmask_addr = static_cast<uint64_t>(buffer[9]) << 8u;
r.fmask_slice_minus1 = buffer[10] & 0x3fffffu;
r.clear_color_word0 = buffer[11];
r.clear_color_word1 = buffer[12];
r.dcc_addr = static_cast<uint64_t>(buffer[13]) << 8u;
r.width = (count == 16 ? (buffer[15] >> 0u) & 0xffffu : 0);
r.height = (count == 16 ? (buffer[15] >> 16u) & 0xffffu : 0);
cp->GetCtx()->SetRenderTarget(param, r);
count += 2;
}
return count;
}
@ -1662,7 +1723,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_color_info)
uint32_t param = (cmd_offset - Pm4::CB_COLOR0_INFO) / 15;
ColorInfo r;
HW::ColorInfo r;
r.fmask_compression_enable = (buffer[4] & 0x4000u) != 0;
r.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u;
@ -1733,7 +1794,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_clip_control)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
EXIT_NOT_IMPLEMENTED(cmd_offset != 0x00000204);
ClipControl r;
HW::ClipControl r;
r.user_clip_planes = buffer[0] & 0x3fu;
r.user_clip_plane_mode = (buffer[0] >> 14u) & 0x3u;
@ -1756,18 +1817,35 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_clip_control)
KYTY_HW_CTX_PARSER(hw_ctx_set_screen_scissor)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0026900);
EXIT_NOT_IMPLEMENTED(cmd_offset != 0x0000000C);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::PA_SC_SCREEN_SCISSOR_TL);
int left = static_cast<int16_t>(static_cast<uint16_t>(buffer[0] & 0xffffu));
int top = static_cast<int16_t>(static_cast<uint16_t>((buffer[0] >> 16u) & 0xffffu));
int right = static_cast<int16_t>(static_cast<uint16_t>(buffer[1] & 0xffffu));
int bottom = static_cast<int16_t>(static_cast<uint16_t>((buffer[1] >> 16u) & 0xffffu));
int left = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[0], PA_SC_SCREEN_SCISSOR_TL, TL_X)));
int top = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[0], PA_SC_SCREEN_SCISSOR_TL, TL_Y)));
int right = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[1], PA_SC_SCREEN_SCISSOR_BR, BR_X)));
int bottom = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[1], PA_SC_SCREEN_SCISSOR_BR, BR_Y)));
cp->GetCtx()->SetScreenScissor(left, top, right, bottom);
return 2;
}
KYTY_HW_CTX_PARSER(hw_ctx_set_generic_scissor)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0026900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::PA_SC_GENERIC_SCISSOR_TL);
int left = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[0], PA_SC_GENERIC_SCISSOR_TL, TL_X)));
int top = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[0], PA_SC_GENERIC_SCISSOR_TL, TL_Y)));
int right = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[1], PA_SC_GENERIC_SCISSOR_BR, BR_X)));
int bottom = static_cast<int16_t>(static_cast<uint16_t>(KYTY_PM4_GET(buffer[1], PA_SC_GENERIC_SCISSOR_BR, BR_Y)));
bool window_offset_disable = KYTY_PM4_GET(buffer[0], PA_SC_GENERIC_SCISSOR_TL, WINDOW_OFFSET_DISABLE) != 0;
cp->GetCtx()->SetGenericScissor(left, top, right, bottom, !window_offset_disable);
return 2;
}
KYTY_HW_CTX_PARSER(hw_ctx_hardware_screen_offset)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900);
@ -1801,7 +1879,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_blend_color)
EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0046900);
EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::CB_BLEND_RED);
BlendColor r;
HW::BlendColor r;
r.red = *reinterpret_cast<const float*>(&buffer[0]);
r.green = *reinterpret_cast<const float*>(&buffer[1]);
@ -1819,7 +1897,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_vs_shader)
auto shader_modifier = buffer[0];
cp->GetCtx()->SetVsShader(reinterpret_cast<const VsStageRegisters*>(buffer + 1), shader_modifier);
cp->GetCtx()->SetVsShader(reinterpret_cast<const HW::VsStageRegisters*>(buffer + 1), shader_modifier);
return 28;
}
@ -1851,7 +1929,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_ps_shader)
{
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0261008);
PsStageRegisters r {};
HW::PsStageRegisters r {};
r.data_addr = (static_cast<uint64_t>(buffer[0]) << 8u) | (static_cast<uint64_t>(buffer[1]) << 40u);
r.vgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK;
@ -1889,7 +1967,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_cs_shader)
auto shader_modifier = buffer[0];
CsStageRegisters r {};
HW::CsStageRegisters r {};
r.data_addr = (static_cast<uint64_t>(buffer[1]) << 8u) | (static_cast<uint64_t>(buffer[2]) << 40u);
r.vgprs = (buffer[3] >> Pm4::COMPUTE_PGM_RSRC1_VGPRS_SHIFT) & Pm4::COMPUTE_PGM_RSRC1_VGPRS_MASK;
@ -2033,14 +2111,47 @@ KYTY_CP_OP_PARSER(cp_op_draw_index_auto)
{
KYTY_PROFILER_FUNCTION();
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0051010);
EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0051010 && cmd_id != 0xc0012d00);
uint32_t index_count = buffer[0];
uint32_t flags = buffer[1];
if (cmd_id == 0xC0051010)
{
uint32_t index_count = buffer[0];
uint32_t flags = buffer[1];
cp->DrawIndexAuto(index_count, flags);
cp->DrawIndexAuto(index_count, flags);
return 6;
return 6;
}
if (cmd_id == 0xc0012d00)
{
uint32_t index_count = buffer[0];
uint32_t flags = 0;
EXIT_NOT_IMPLEMENTED(buffer[1] != 2);
cp->DrawIndexAuto(index_count, flags);
EXIT_NOT_IMPLEMENTED(!(dw >= 4));
if (buffer[2] == 0xc0001000)
{
EXIT_NOT_IMPLEMENTED(buffer[3] != 0);
return 4;
}
if (buffer[2] == 0xc0021000)
{
EXIT_NOT_IMPLEMENTED(buffer[3] != 0);
return 6;
}
EXIT("invalid draw_index_auto\n");
}
return 1;
}
KYTY_CP_OP_PARSER(cp_op_dispatch_direct)
@ -2171,7 +2282,7 @@ KYTY_CP_OP_PARSER(cp_op_set_shader_reg)
EXIT("unknown reg: %u\n", reg);
}
cp->SetUserDataMarker(UserSgprType::Unknown);
cp->SetUserDataMarker(HW::UserSgprType::Unknown);
return 1 + reg_num;
}
@ -2299,6 +2410,7 @@ KYTY_CP_OP_PARSER(cp_op_wait_on_address)
return 13;
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
KYTY_CP_OP_PARSER(cp_op_acquire_mem)
{
KYTY_PROFILER_FUNCTION();
@ -2313,7 +2425,11 @@ KYTY_CP_OP_PARSER(cp_op_acquire_mem)
uint32_t base_hi = buffer[4];
uint32_t poll = buffer[5];
EXIT_NOT_IMPLEMENTED(stall_mode != 1);
uint32_t target_mask = cache_action & 0x00007FC0u;
uint32_t extended_action = cache_action & 0x2E000000u;
uint32_t action = ((cache_action & 0x00C00000u) >> 0x12u) | ((cache_action & 0x00058000u) >> 0xfu);
// EXIT_NOT_IMPLEMENTED(stall_mode != 1);
EXIT_NOT_IMPLEMENTED(size_hi != 0);
EXIT_NOT_IMPLEMENTED(base_hi != 0);
EXIT_NOT_IMPLEMENTED(poll != 10);
@ -2321,36 +2437,81 @@ KYTY_CP_OP_PARSER(cp_op_acquire_mem)
switch (cache_action)
{
case 0x02c40040:
case 0x02c43fc0:
{
// target_mask: 0x00000040 (rt0), 0x00003fc0 (all rt)
// extended_action: 0x02000000 (FlushAndInvalidateCbCache)
// action: 0x38 (WriteBackAndInvalidateL1andL2)
EXIT_IF(target_mask != 0x00000040 && target_mask != 0x00003FC0);
EXIT_IF(extended_action != 0x02000000);
EXIT_IF(action != 0x38);
EXIT_NOT_IMPLEMENTED(size_lo == 0);
EXIT_NOT_IMPLEMENTED(base_lo == 0);
cp->RenderTextureBarrier(base_lo << 8u, size_lo << 8u);
cp->WriteBack();
}
break;
case 0x02003fc0:
{
// target_mask: 0x00003FC0 (all rt)
// extended_action: 0x02000000 (FlushAndInvalidateCbCache)
// action: 0x00 (none)
EXIT_IF(target_mask != 0x00003FC0);
EXIT_IF(extended_action != 0x02000000);
EXIT_IF(action != 0x00);
EXIT_NOT_IMPLEMENTED(size_lo == 0);
EXIT_NOT_IMPLEMENTED(base_lo == 0);
cp->RenderTextureBarrier(base_lo << 8u, size_lo << 8u);
}
break;
case 0x00C40000:
{
// target_mask: 0x00000000 (none)
// extended_action: 0x00000000 (none)
// action: 0x38 (WriteBackAndInvalidateL1andL2)
EXIT_IF(target_mask != 0x00000000);
EXIT_IF(extended_action != 0x00000000);
EXIT_IF(action != 0x38);
EXIT_NOT_IMPLEMENTED(size_lo != 1);
EXIT_NOT_IMPLEMENTED(base_lo != 0);
cp->MemoryBarrier();
cp->WriteBack();
}
break;
case 0x00400000:
{
// target_mask: 0x00000000 (none)
// extended_action: 0x00000000 (none)
// action: 0x10 (InvalidateL1)
EXIT_IF(target_mask != 0x00000000);
EXIT_IF(extended_action != 0x00000000);
EXIT_IF(action != 0x10);
EXIT_NOT_IMPLEMENTED(size_lo != 1);
EXIT_NOT_IMPLEMENTED(base_lo != 0);
cp->MemoryBarrier();
}
break;
default: EXIT("unknown barrier");
case 0x04c44000:
{
// target_mask: 0x00004000 (Depth Target)
// extended_action: 0x04000000 (FlushAndInvalidateDbCache)
// action: 0x38 (WriteBackAndInvalidateL1andL2)
KYTY_NOT_IMPLEMENTED;
}
break;
default:
EXIT("unknown barrier: 0x%08" PRIx32 ", 0x%08" PRIx32 ", 0x%08" PRIx32 ", 0x%08" PRIx32 "\n", cache_action, target_mask,
extended_action, action);
}
if (stall_mode == 0)
{
cp->BufferFlush();
cp->BufferWait();
}
return 6;
@ -2416,8 +2577,8 @@ KYTY_CP_OP_PARSER(cp_op_marker)
switch (id)
{
case 0x0: cp->SetEmbeddedDataMarker(buffer + 1, len_dw, align); break;
case 0x4: cp->SetUserDataMarker(UserSgprType::Vsharp); break;
case 0xd: cp->SetUserDataMarker(UserSgprType::Region); break;
case 0x4: cp->SetUserDataMarker(HW::UserSgprType::Vsharp); break;
case 0xd: cp->SetUserDataMarker(HW::UserSgprType::Region); break;
case 0x778:
{
auto* addr = reinterpret_cast<void*>(buffer[1] | (static_cast<uint64_t>(buffer[2]) << 32u));
@ -2522,25 +2683,28 @@ static void graphics_init_jmp_tables()
func = nullptr;
}
g_hw_ctx_func[Pm4::DB_RENDER_CONTROL] = hw_ctx_set_render_control;
g_hw_ctx_func[Pm4::DB_STENCIL_CLEAR] = hw_ctx_set_stencil_clear;
g_hw_ctx_func[Pm4::DB_DEPTH_CLEAR] = hw_ctx_set_depth_clear;
g_hw_ctx_func[0x00c] = hw_ctx_set_screen_scissor;
g_hw_ctx_func[Pm4::DB_Z_INFO] = hw_ctx_set_depth_render_target;
g_hw_ctx_func[Pm4::DB_STENCIL_INFO] = hw_ctx_set_stencil_info;
g_hw_ctx_func[0x08d] = hw_ctx_hardware_screen_offset;
g_hw_ctx_func[0x08e] = hw_ctx_set_render_target_mask;
g_hw_ctx_func[Pm4::CB_BLEND_RED] = hw_ctx_set_blend_color;
g_hw_ctx_func[Pm4::DB_STENCIL_CONTROL] = hw_ctx_set_stencil_control;
g_hw_ctx_func[Pm4::DB_STENCILREFMASK] = hw_ctx_set_stencil_mask;
g_hw_ctx_func[Pm4::SPI_PS_INPUT_CNTL_0] = hw_ctx_set_ps_input;
g_hw_ctx_func[Pm4::DB_DEPTH_CONTROL] = hw_ctx_set_depth_control;
g_hw_ctx_func[Pm4::DB_EQAA] = hw_ctx_set_eqaa_control;
g_hw_ctx_func[0x204] = hw_ctx_set_clip_control;
g_hw_ctx_func[Pm4::PA_SU_SC_MODE_CNTL] = hw_ctx_set_mode_control;
g_hw_ctx_func[0x206] = hw_ctx_set_viewport_transform_control;
g_hw_ctx_func[Pm4::VGT_SHADER_STAGES_EN] = hw_ctx_set_shader_stages;
g_hw_ctx_func[0x2fa] = hw_ctx_set_guard_bands;
g_hw_ctx_func[Pm4::DB_RENDER_CONTROL] = hw_ctx_set_render_control;
g_hw_ctx_func[Pm4::DB_STENCIL_CLEAR] = hw_ctx_set_stencil_clear;
g_hw_ctx_func[Pm4::DB_DEPTH_CLEAR] = hw_ctx_set_depth_clear;
g_hw_ctx_func[Pm4::PA_SC_SCREEN_SCISSOR_TL] = hw_ctx_set_screen_scissor;
g_hw_ctx_func[Pm4::DB_Z_INFO] = hw_ctx_set_depth_render_target;
g_hw_ctx_func[Pm4::DB_STENCIL_INFO] = hw_ctx_set_stencil_info;
g_hw_ctx_func[0x08d] = hw_ctx_hardware_screen_offset;
g_hw_ctx_func[0x08e] = hw_ctx_set_render_target_mask;
g_hw_ctx_func[Pm4::PA_SC_GENERIC_SCISSOR_TL] = hw_ctx_set_generic_scissor;
g_hw_ctx_func[Pm4::CB_BLEND_RED] = hw_ctx_set_blend_color;
g_hw_ctx_func[Pm4::DB_STENCIL_CONTROL] = hw_ctx_set_stencil_control;
g_hw_ctx_func[Pm4::DB_STENCILREFMASK] = hw_ctx_set_stencil_mask;
g_hw_ctx_func[Pm4::SPI_PS_INPUT_CNTL_0] = hw_ctx_set_ps_input;
g_hw_ctx_func[Pm4::DB_DEPTH_CONTROL] = hw_ctx_set_depth_control;
g_hw_ctx_func[Pm4::DB_EQAA] = hw_ctx_set_eqaa_control;
g_hw_ctx_func[Pm4::CB_COLOR_CONTROL] = hw_ctx_set_color_control;
g_hw_ctx_func[0x204] = hw_ctx_set_clip_control;
g_hw_ctx_func[Pm4::PA_SU_SC_MODE_CNTL] = hw_ctx_set_mode_control;
g_hw_ctx_func[0x206] = hw_ctx_set_viewport_transform_control;
g_hw_ctx_func[Pm4::PA_SU_LINE_CNTL] = hw_ctx_set_line_control;
g_hw_ctx_func[Pm4::VGT_SHADER_STAGES_EN] = hw_ctx_set_shader_stages;
g_hw_ctx_func[0x2fa] = hw_ctx_set_guard_bands;
for (uint32_t slot = 0; slot < 8; slot++)
{
@ -2563,6 +2727,7 @@ static void graphics_init_jmp_tables()
g_cp_op_func[Pm4::IT_NOP] = cp_op_nop;
g_cp_op_func[Pm4::IT_INDEX_TYPE] = cp_op_index_type;
g_cp_op_func[Pm4::IT_DRAW_INDEX_AUTO] = cp_op_draw_index_auto;
g_cp_op_func[Pm4::IT_WAIT_REG_MEM] = cp_op_wait_reg_mem;
g_cp_op_func[Pm4::IT_WRITE_DATA] = cp_op_write_data;
g_cp_op_func[Pm4::IT_INDIRECT_BUFFER] = cp_op_indirect_buffer;

View file

@ -31,6 +31,7 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint
auto pixel_format = static_cast<VkFormat>(params[DepthStencilBufferObject::PARAM_FORMAT]);
auto width = params[DepthStencilBufferObject::PARAM_WIDTH];
auto height = params[DepthStencilBufferObject::PARAM_HEIGHT];
bool htile = params[DepthStencilBufferObject::PARAM_HTILE] != 1;
EXIT_NOT_IMPLEMENTED(pixel_format == VK_FORMAT_UNDEFINED);
EXIT_NOT_IMPLEMENTED(width == 0);
@ -45,6 +46,8 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint
vk_obj->image_view = nullptr;
vk_obj->layout = VK_IMAGE_LAYOUT_UNDEFINED;
vk_obj->compressed = htile;
VkImageCreateInfo image_info {};
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info.pNext = nullptr;
@ -58,9 +61,10 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint
image_info.format = vk_obj->format;
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.initialLayout = vk_obj->layout;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.usage = static_cast<VkImageUsageFlags>(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) |
static_cast<VkImageUsageFlags>(VK_IMAGE_USAGE_SAMPLED_BIT);
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
vkCreateImage(ctx->device, &image_info, nullptr, &vk_obj->image);
@ -101,7 +105,27 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint
vkCreateImageView(ctx->device, &create_info, nullptr, &vk_obj->image_view);
VkImageViewCreateInfo create_info2 {};
create_info2.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
create_info2.pNext = nullptr;
create_info2.flags = 0;
create_info2.image = vk_obj->image;
create_info2.viewType = VK_IMAGE_VIEW_TYPE_2D;
create_info2.format = vk_obj->format;
create_info2.components.r = VK_COMPONENT_SWIZZLE_R;
create_info2.components.g = VK_COMPONENT_SWIZZLE_R;
create_info2.components.b = VK_COMPONENT_SWIZZLE_R;
create_info2.components.a = VK_COMPONENT_SWIZZLE_R;
create_info2.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
create_info2.subresourceRange.baseArrayLayer = 0;
create_info2.subresourceRange.baseMipLevel = 0;
create_info2.subresourceRange.layerCount = 1;
create_info2.subresourceRange.levelCount = 1;
vkCreateImageView(ctx->device, &create_info2, nullptr, &vk_obj->texture_view);
EXIT_NOT_IMPLEMENTED(vk_obj->image_view == nullptr);
EXIT_NOT_IMPLEMENTED(vk_obj->texture_view == nullptr);
UtilSetDepthLayoutOptimal(vk_obj);
@ -119,6 +143,7 @@ static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem)
DeleteFramebuffer(vk_obj);
vkDestroyImageView(ctx->device, vk_obj->texture_view, nullptr);
vkDestroyImageView(ctx->device, vk_obj->image_view, nullptr);
vkDestroyImage(ctx->device, vk_obj->image, nullptr);

View file

@ -154,7 +154,7 @@ public:
void ResetHash(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, GpuMemoryObjectType type);
void FrameDone();
Vector<GpuMemoryObject> FindObjects(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool exact);
Vector<GpuMemoryObject> FindObjects(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool exact, bool only_first);
// Sync: GPU -> CPU
void WriteBack(GraphicContext* ctx);
@ -217,7 +217,7 @@ private:
void Free(GraphicContext* ctx, int object_id);
Vector<OverlappedBlock> FindBlocks(const uint64_t* vaddr, const uint64_t* size, int vaddr_num);
Vector<OverlappedBlock> FindBlocks(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool only_first = false);
Block CreateBlock(const uint64_t* vaddr, const uint64_t* size, int vaddr_num);
void DeleteBlock(Block* b);
void Link(int id1, int id2, OverlapType rel, GpuMemoryScenario scenario);
@ -227,7 +227,9 @@ private:
static void WatchCallback(void* a0, void* a1);
bool create_existing(const Vector<OverlappedBlock>& others, const GpuObject& info, int* id);
bool create_generate_mips(const Vector<OverlappedBlock>& others, GpuMemoryObjectType type);
bool create_texture_triplet(const Vector<OverlappedBlock>& others, GpuMemoryObjectType type);
bool create_all_the_same(const Vector<OverlappedBlock>& others);
void create_dbg_exit(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, const Vector<OverlappedBlock>& others,
GpuMemoryObjectType type);
@ -817,43 +819,96 @@ void GpuMemory::WatchCallback(void* a0, void* a1)
m->m_mutex.Unlock();
}
bool GpuMemory::create_existing(const Vector<OverlappedBlock>& others, const GpuObject& info, int* id)
{
EXIT_IF(id == nullptr);
uint64_t max_gpu_update_time = 0;
const OverlappedBlock* latest_block = nullptr;
for (const auto& obj: others)
{
auto& h = m_objects[obj.object_id];
EXIT_IF(h.free);
auto& o = h.info;
if (h.scenario == GpuMemoryScenario::Common && obj.relation == OverlapType::Equals && o.object.type == info.type &&
info.Equal(o.params))
{
*id = obj.object_id;
return true;
}
if (o.gpu_update_time > max_gpu_update_time)
{
max_gpu_update_time = o.gpu_update_time;
latest_block = &obj;
}
}
if (latest_block != nullptr)
{
auto& h = m_objects[latest_block->object_id];
auto& o = h.info;
if (h.scenario == GpuMemoryScenario::GenerateMips && latest_block->relation == OverlapType::Equals && o.object.type == info.type &&
info.Equal(o.params))
{
*id = latest_block->object_id;
return true;
}
}
return false;
}
bool GpuMemory::create_generate_mips(const Vector<OverlappedBlock>& others, GpuMemoryObjectType type)
{
if (others.Size() == 2 && type == GpuMemoryObjectType::RenderTexture)
if (others.Size() == 3 && type == GpuMemoryObjectType::RenderTexture)
{
const auto& b0 = others.At(0);
const auto& b1 = others.At(1);
const auto& b2 = others.At(2);
OverlapType rel0 = b0.relation;
OverlapType rel1 = b1.relation;
OverlapType rel2 = b2.relation;
const auto& o0 = m_objects[b0.object_id];
const auto& o1 = m_objects[b1.object_id];
const auto& o2 = m_objects[b2.object_id];
GpuMemoryObjectType type0 = o0.info.object.type;
GpuMemoryObjectType type1 = o1.info.object.type;
GpuMemoryObjectType type2 = o2.info.object.type;
if (rel0 == OverlapType::Contains && rel1 == OverlapType::Contains && type0 == GpuMemoryObjectType::StorageBuffer &&
type1 == GpuMemoryObjectType::StorageTexture &&
((o0.others.Size() == 1 && o0.scenario == GpuMemoryScenario::Common && o1.others.Size() == 1 &&
o1.scenario == GpuMemoryScenario::Common) ||
(o0.others.Size() >= 2 && o0.scenario == GpuMemoryScenario::GenerateMips && o1.others.Size() >= 2 &&
o1.scenario == GpuMemoryScenario::GenerateMips)))
if (rel0 == OverlapType::Contains && rel1 == OverlapType::Contains && rel2 == OverlapType::Contains &&
type0 == GpuMemoryObjectType::StorageBuffer && type1 == GpuMemoryObjectType::Texture &&
type2 == GpuMemoryObjectType::StorageTexture &&
((o0.others.Size() == 2 && o0.scenario == GpuMemoryScenario::TextureTriplet && o1.others.Size() == 2 &&
o1.scenario == GpuMemoryScenario::TextureTriplet && o2.others.Size() == 2 &&
o2.scenario == GpuMemoryScenario::TextureTriplet) ||
(o0.others.Size() >= 3 && o0.scenario == GpuMemoryScenario::GenerateMips && o1.others.Size() >= 3 &&
o1.scenario == GpuMemoryScenario::GenerateMips && o2.others.Size() >= 3 && o2.scenario == GpuMemoryScenario::GenerateMips)))
{
return true;
}
} else if (others.Size() >= 2 && type == GpuMemoryObjectType::Texture)
} else if (others.Size() >= 3 && type == GpuMemoryObjectType::Texture)
{
const auto& b0 = others.At(0);
const auto& b1 = others.At(1);
const auto& b2 = others.At(2);
OverlapType rel0 = b0.relation;
OverlapType rel1 = b1.relation;
OverlapType rel2 = b2.relation;
const auto& o0 = m_objects[b0.object_id];
const auto& o1 = m_objects[b1.object_id];
const auto& o2 = m_objects[b2.object_id];
GpuMemoryObjectType type0 = o0.info.object.type;
GpuMemoryObjectType type1 = o1.info.object.type;
GpuMemoryObjectType type2 = o2.info.object.type;
if (((rel0 == OverlapType::Contains && rel1 == OverlapType::Contains) ||
(rel0 == OverlapType::Equals && rel1 == OverlapType::Equals)) &&
type0 == GpuMemoryObjectType::StorageBuffer && type1 == GpuMemoryObjectType::StorageTexture &&
o0.scenario == GpuMemoryScenario::GenerateMips && o1.scenario == GpuMemoryScenario::GenerateMips)
if (((rel0 == OverlapType::Contains && rel1 == OverlapType::Contains && rel2 == OverlapType::Contains) ||
(rel0 == OverlapType::Equals && rel1 == OverlapType::Equals && rel2 == OverlapType::Equals)) &&
type0 == GpuMemoryObjectType::StorageBuffer && type1 == GpuMemoryObjectType::Texture &&
type2 == GpuMemoryObjectType::StorageTexture && o0.scenario == GpuMemoryScenario::GenerateMips &&
o1.scenario == GpuMemoryScenario::GenerateMips && o2.scenario == GpuMemoryScenario::GenerateMips)
{
return true;
}
@ -862,6 +917,30 @@ bool GpuMemory::create_generate_mips(const Vector<OverlappedBlock>& others, GpuM
return false;
}
bool GpuMemory::create_texture_triplet(const Vector<OverlappedBlock>& others, GpuMemoryObjectType type)
{
if (others.Size() == 2 && type == GpuMemoryObjectType::StorageTexture)
{
const auto& b0 = others.At(0);
const auto& b1 = others.At(1);
OverlapType rel0 = b0.relation;
OverlapType rel1 = b1.relation;
const auto& o0 = m_objects[b0.object_id];
const auto& o1 = m_objects[b1.object_id];
GpuMemoryObjectType type0 = o0.info.object.type;
GpuMemoryObjectType type1 = o1.info.object.type;
if (rel0 == OverlapType::Equals && rel1 == OverlapType::Equals && type0 == GpuMemoryObjectType::StorageBuffer &&
type1 == GpuMemoryObjectType::Texture &&
(o0.others.Size() == 1 && o0.scenario == GpuMemoryScenario::Common && o1.others.Size() == 1 &&
o1.scenario == GpuMemoryScenario::Common))
{
return true;
}
}
return false;
}
bool GpuMemory::create_all_the_same(const Vector<OverlappedBlock>& others)
{
OverlapType rel = others.At(0).relation;
@ -912,22 +991,22 @@ void* GpuMemory::CreateObject(GraphicContext* ctx, CommandBuffer* buffer, const
if (!others.IsEmpty())
{
for (const auto& obj: others)
int existing_id = -1;
if (create_existing(others, info, &existing_id))
{
auto& h = m_objects[obj.object_id];
auto& h = m_objects[existing_id];
EXIT_IF(h.free);
auto& o = h.info;
if (obj.relation == OverlapType::Equals && o.object.type == info.type && info.Equal(o.params))
{
Update(ctx, obj.object_id);
o.use_num++;
o.use_last_frame = m_current_frame;
o.in_use = true;
o.read_only = info.read_only;
o.check_hash = info.check_hash;
return o.object.obj;
}
Update(ctx, existing_id);
o.use_num++;
o.use_last_frame = m_current_frame;
o.in_use = true;
o.read_only = info.read_only;
o.check_hash = info.check_hash;
return o.object.obj;
}
if (others.Size() == 1)
@ -941,6 +1020,7 @@ void* GpuMemory::CreateObject(GraphicContext* ctx, CommandBuffer* buffer, const
{
case ObjectsRelation(GpuMemoryObjectType::StorageBuffer, OverlapType::Equals, GpuMemoryObjectType::RenderTexture):
case ObjectsRelation(GpuMemoryObjectType::StorageBuffer, OverlapType::Equals, GpuMemoryObjectType::StorageTexture):
case ObjectsRelation(GpuMemoryObjectType::StorageBuffer, OverlapType::Equals, GpuMemoryObjectType::Texture):
case ObjectsRelation(GpuMemoryObjectType::VideoOutBuffer, OverlapType::Equals, GpuMemoryObjectType::StorageBuffer):
{
overlap = true;
@ -970,6 +1050,10 @@ void* GpuMemory::CreateObject(GraphicContext* ctx, CommandBuffer* buffer, const
overlap = true;
create_from_objects = true;
scenario = GpuMemoryScenario::GenerateMips;
} else if (create_texture_triplet(others, info.type))
{
overlap = true;
scenario = GpuMemoryScenario::TextureTriplet;
} else
{
if (!create_all_the_same(others))
@ -1108,7 +1192,7 @@ void* GpuMemory::CreateObject(GraphicContext* ctx, CommandBuffer* buffer, const
return o.object.obj;
}
Vector<GpuMemoryObject> GpuMemory::FindObjects(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool exact)
Vector<GpuMemoryObject> GpuMemory::FindObjects(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool exact, bool only_first)
{
KYTY_PROFILER_BLOCK("GpuMemory::FindObjects", profiler::colors::Green200);
@ -1116,7 +1200,7 @@ Vector<GpuMemoryObject> GpuMemory::FindObjects(const uint64_t* vaddr, const uint
Core::LockGuard lock(m_mutex);
auto objects = FindBlocks(vaddr, size, vaddr_num);
auto objects = FindBlocks(vaddr, size, vaddr_num, only_first);
Vector<GpuMemoryObject> ret;
@ -1244,12 +1328,13 @@ void GpuMemory::Free(GraphicContext* ctx, int object_id)
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
Vector<GpuMemory::OverlappedBlock> GpuMemory::FindBlocks(const uint64_t* vaddr, const uint64_t* size, int vaddr_num)
Vector<GpuMemory::OverlappedBlock> GpuMemory::FindBlocks(const uint64_t* vaddr, const uint64_t* size, int vaddr_num, bool only_first)
{
KYTY_PROFILER_BLOCK("GpuMemory::FindBlocks", profiler::colors::Green100);
EXIT_IF(vaddr_num <= 0 || vaddr_num > VADDR_BLOCKS_MAX);
EXIT_IF(vaddr == nullptr || size == nullptr);
EXIT_IF(only_first && vaddr_num != 1);
Vector<GpuMemory::OverlappedBlock> ret;
@ -1305,7 +1390,7 @@ Vector<GpuMemory::OverlappedBlock> GpuMemory::FindBlocks(const uint64_t* vaddr,
{
if (!b.free)
{
if (b.block.vaddr_num == 1)
if (b.block.vaddr_num == 1 || only_first)
{
auto type = GetOverlapType(b.block.vaddr[0], b.block.size[0], vaddr[0], size[0]);
if (type != OverlapType::None)
@ -1691,11 +1776,11 @@ void* GpuMemoryCreateObject(GraphicContext* ctx, CommandBuffer* buffer, const ui
return g_gpu_memory->CreateObject(ctx, buffer, vaddr, size, vaddr_num, info);
}
Vector<GpuMemoryObject> GpuMemoryFindObjects(uint64_t vaddr, uint64_t size, bool exact)
Vector<GpuMemoryObject> GpuMemoryFindObjects(uint64_t vaddr, uint64_t size, bool exact, bool only_first)
{
EXIT_IF(g_gpu_memory == nullptr);
return g_gpu_memory->FindObjects(&vaddr, &size, 1, exact);
return g_gpu_memory->FindObjects(&vaddr, &size, 1, exact, only_first);
}
void GpuMemoryResetHash(GraphicContext* ctx, const uint64_t* vaddr, const uint64_t* size, int vaddr_num, GpuMemoryObjectType type)
@ -1746,6 +1831,11 @@ bool GpuMemoryCheckAccessViolation(uint64_t vaddr, uint64_t size)
return g_gpu_memory_watcher->Check(vaddr, size);
}
bool GpuMemoryWatcherEnabled()
{
return MemoryWatcher::Enabled();
}
bool VulkanAllocate(GraphicContext* ctx, VulkanMemory* mem)
{
static std::atomic<uint64_t> seq = 0;

View file

@ -86,10 +86,15 @@ static void update2_func(GraphicContext* ctx, CommandBuffer* buffer, const uint6
vk_obj->layout = VK_IMAGE_LAYOUT_UNDEFINED;
if (objects.Size() == 2 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
objects.At(1).type == GpuMemoryObjectType::StorageTexture && scenario == GpuMemoryScenario::GenerateMips)
// if (objects.Size() == 2 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
// objects.At(1).type == GpuMemoryObjectType::StorageTexture && scenario == GpuMemoryScenario::GenerateMips)
// {
// auto* src_obj = static_cast<StorageTextureVulkanImage*>(objects.At(1).obj);
if (objects.Size() == 3 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
objects.At(1).type == GpuMemoryObjectType::Texture && objects.At(2).type == GpuMemoryObjectType::StorageTexture &&
scenario == GpuMemoryScenario::GenerateMips)
{
auto* src_obj = static_cast<StorageTextureVulkanImage*>(objects.At(1).obj);
auto* src_obj = static_cast<StorageTextureVulkanImage*>(objects.At(2).obj);
uint32_t mip_width = src_obj->extent.width;
uint32_t mip_height = src_obj->extent.height;

View file

@ -278,10 +278,12 @@ static void update2_func(GraphicContext* ctx, CommandBuffer* buffer, const uint6
mip_height /= 2;
}
}
} else if (objects.Size() >= 2 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
objects.At(1).type == GpuMemoryObjectType::StorageTexture && scenario == GpuMemoryScenario::GenerateMips)
// } else if (objects.Size() >= 2 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
// objects.At(1).type == GpuMemoryObjectType::StorageTexture && scenario == GpuMemoryScenario::GenerateMips)
} else if (objects.Size() >= 3 && objects.At(0).type == GpuMemoryObjectType::StorageBuffer &&
objects.At(1).type == GpuMemoryObjectType::Texture && objects.At(2).type == GpuMemoryObjectType::StorageTexture &&
scenario == GpuMemoryScenario::GenerateMips)
{
for (uint32_t i = 0; i < levels; i++)
{
VulkanImage* src_image = nullptr;

View file

@ -1777,7 +1777,7 @@ KYTY_SHADER_PARSER(shader_parse)
return ptr - src;
}
static void vs_print(const char* func, const VsStageRegisters& vs)
static void vs_print(const char* func, const HW::VsStageRegisters& vs)
{
printf("%s\n", func);
@ -1792,7 +1792,7 @@ static void vs_print(const char* func, const VsStageRegisters& vs)
printf("\t m_paClVsOutCntl = 0x%08" PRIx32 "\n", vs.m_paClVsOutCntl);
}
static void ps_print(const char* func, const PsStageRegisters& ps)
static void ps_print(const char* func, const HW::PsStageRegisters& ps)
{
printf("%s\n", func);
@ -1820,7 +1820,7 @@ static void ps_print(const char* func, const PsStageRegisters& ps)
printf("\t m_cbShaderMask = 0x%08" PRIx32 "\n", ps.m_cbShaderMask);
}
static void cs_print(const char* func, const CsStageRegisters& cs)
static void cs_print(const char* func, const HW::CsStageRegisters& cs)
{
printf("%s\n", func);
@ -1872,7 +1872,7 @@ static void bi_print(const char* func, const ShaderBinaryInfo& bi)
printf("\t crc32 = 0x%08" PRIx32 "\n", bi.crc32);
}
static void vs_check(const VsStageRegisters& vs)
static void vs_check(const HW::VsStageRegisters& vs)
{
EXIT_NOT_IMPLEMENTED(vs.GetStreamoutEnabled() != false);
// EXIT_NOT_IMPLEMENTED(vs.GetSgprCount() != 0x00000000);
@ -1884,7 +1884,7 @@ static void vs_check(const VsStageRegisters& vs)
EXIT_NOT_IMPLEMENTED(vs.m_paClVsOutCntl != 0x00000000);
}
static void ps_check(const PsStageRegisters& ps)
static void ps_check(const HW::PsStageRegisters& ps)
{
EXIT_NOT_IMPLEMENTED(ps.target_output_mode[0] != 4 && ps.target_output_mode[0] != 9);
EXIT_NOT_IMPLEMENTED(ps.conservative_z_export_value != 0x00000000);
@ -1907,7 +1907,7 @@ static void ps_check(const PsStageRegisters& ps)
EXIT_NOT_IMPLEMENTED(ps.m_cbShaderMask != 0x0000000f);
}
static void cs_check(const CsStageRegisters& cs)
static void cs_check(const HW::CsStageRegisters& cs)
{
// EXIT_NOT_IMPLEMENTED(cs.num_thread_x != 0x00000040);
// EXIT_NOT_IMPLEMENTED(cs.num_thread_y != 0x00000001);
@ -2207,7 +2207,7 @@ static void ShaderParseFetch(ShaderVertexInputInfo* info, const uint32_t* fetch,
}
static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index, int slot, ShaderStorageUsage usage,
const UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -2230,7 +2230,7 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index
for (int j = 0; j < 4; j++)
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != UserSgprType::Vsharp && type != UserSgprType::Region);
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
}
}
@ -2243,7 +2243,7 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index
}
static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index, int slot, ShaderTextureUsage usage,
const UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -2256,33 +2256,45 @@ static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index
EXIT_NOT_IMPLEMENTED(!extended && start_index >= 16);
EXIT_NOT_IMPLEMENTED(extended && !(start_index >= 16));
info->start_register[index] = start_index;
info->extended[index] = extended;
info->slots[index] = slot;
info->usages[index] = usage;
info->desc[index].start_register = start_index;
info->desc[index].extended = extended;
info->desc[index].slot = slot;
info->desc[index].usage = usage;
EXIT_IF(usage == ShaderTextureUsage::Unknown);
if (usage == ShaderTextureUsage::ReadWrite)
{
info->textures2d_storage_num++;
info->desc[index].textures2d_without_sampler = true;
} else
{
info->textures2d_sampled_num++;
info->desc[index].textures2d_without_sampler = false;
}
if (!extended)
{
for (int j = 0; j < 8; j++)
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != UserSgprType::Vsharp && type != UserSgprType::Region);
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
}
}
info->textures[index].fields[0] = (extended ? extended_buffer[start_index - 16 + 0] : user_sgpr.value[start_index + 0]);
info->textures[index].fields[1] = (extended ? extended_buffer[start_index - 16 + 1] : user_sgpr.value[start_index + 1]);
info->textures[index].fields[2] = (extended ? extended_buffer[start_index - 16 + 2] : user_sgpr.value[start_index + 2]);
info->textures[index].fields[3] = (extended ? extended_buffer[start_index - 16 + 3] : user_sgpr.value[start_index + 3]);
info->textures[index].fields[4] = (extended ? extended_buffer[start_index - 16 + 4] : user_sgpr.value[start_index + 4]);
info->textures[index].fields[5] = (extended ? extended_buffer[start_index - 16 + 5] : user_sgpr.value[start_index + 5]);
info->textures[index].fields[6] = (extended ? extended_buffer[start_index - 16 + 6] : user_sgpr.value[start_index + 6]);
info->textures[index].fields[7] = (extended ? extended_buffer[start_index - 16 + 7] : user_sgpr.value[start_index + 7]);
info->desc[index].texture.fields[0] = (extended ? extended_buffer[start_index - 16 + 0] : user_sgpr.value[start_index + 0]);
info->desc[index].texture.fields[1] = (extended ? extended_buffer[start_index - 16 + 1] : user_sgpr.value[start_index + 1]);
info->desc[index].texture.fields[2] = (extended ? extended_buffer[start_index - 16 + 2] : user_sgpr.value[start_index + 2]);
info->desc[index].texture.fields[3] = (extended ? extended_buffer[start_index - 16 + 3] : user_sgpr.value[start_index + 3]);
info->desc[index].texture.fields[4] = (extended ? extended_buffer[start_index - 16 + 4] : user_sgpr.value[start_index + 4]);
info->desc[index].texture.fields[5] = (extended ? extended_buffer[start_index - 16 + 5] : user_sgpr.value[start_index + 5]);
info->desc[index].texture.fields[6] = (extended ? extended_buffer[start_index - 16 + 6] : user_sgpr.value[start_index + 6]);
info->desc[index].texture.fields[7] = (extended ? extended_buffer[start_index - 16 + 7] : user_sgpr.value[start_index + 7]);
info->textures_num++;
}
static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int slot, const UserSgprInfo& user_sgpr,
static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -2305,7 +2317,7 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int
for (int j = 0; j < 4; j++)
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != UserSgprType::Vsharp && type != UserSgprType::Region);
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
}
}
@ -2317,7 +2329,7 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int
info->samplers_num++;
}
static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int slot, const UserSgprInfo& user_sgpr,
static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -2338,7 +2350,7 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s
if (!extended)
{
auto type = user_sgpr.type[start_index];
EXIT_NOT_IMPLEMENTED(type != UserSgprType::Unknown);
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Unknown);
}
info->pointers[index].field = (extended ? extended_buffer[start_index - 16] : user_sgpr.value[start_index]);
@ -2381,7 +2393,7 @@ void ShaderCalcBindingIndices(ShaderBindResources* bind)
EXIT_IF((bind->push_constant_size % 16) != 0);
}
void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* info)
void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, ShaderVertexInputInfo* info)
{
KYTY_PROFILER_FUNCTION();
@ -2468,7 +2480,7 @@ void ShaderGetInputInfoVS(const VertexShaderInfo* regs, ShaderVertexInputInfo* i
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info)
void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info)
{
KYTY_PROFILER_FUNCTION();
@ -2522,7 +2534,7 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
{
ShaderGetTextureBuffer(&ps_info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly,
regs->ps_user_sgpr, extended_buffer);
EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.textures[ps_info->bind.textures2D.textures_num - 1].Type() != 9);
EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.desc[ps_info->bind.textures2D.textures_num - 1].texture.Type() != 9);
}
break;
case 0x01:
@ -2540,7 +2552,7 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
{
ShaderGetTextureBuffer(&ps_info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite,
regs->ps_user_sgpr, extended_buffer);
EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.textures[ps_info->bind.textures2D.textures_num - 1].Type() != 9);
EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.desc[ps_info->bind.textures2D.textures_num - 1].texture.Type() != 9);
}
break;
case 0x1b:
@ -2562,7 +2574,7 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo* info)
void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, ShaderComputeInputInfo* info)
{
EXIT_IF(info == nullptr);
EXIT_IF(regs == nullptr);
@ -2602,7 +2614,7 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo*
{
ShaderGetTextureBuffer(&info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly,
regs->cs_user_sgpr, extended_buffer);
EXIT_NOT_IMPLEMENTED(info->bind.textures2D.textures[info->bind.textures2D.textures_num - 1].Type() != 9);
EXIT_NOT_IMPLEMENTED(info->bind.textures2D.desc[info->bind.textures2D.textures_num - 1].texture.Type() != 9);
}
break;
case 0x02:
@ -2620,7 +2632,7 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo*
{
ShaderGetTextureBuffer(&info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite,
regs->cs_user_sgpr, extended_buffer);
EXIT_NOT_IMPLEMENTED(info->bind.textures2D.textures[info->bind.textures2D.textures_num - 1].Type() != 9);
EXIT_NOT_IMPLEMENTED(info->bind.textures2D.desc[info->bind.textures2D.textures_num - 1].texture.Type() != 9);
}
break;
case 0x07:
@ -2692,7 +2704,7 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind)
for (int i = 0; i < bind.textures2D.textures_num; i++)
{
const auto& r = bind.textures2D.textures[i];
const auto& r = bind.textures2D.desc[i].texture;
printf("\t Texture %d\n", i);
@ -2723,10 +2735,10 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind)
printf("\t\t MinLodWarn() = %" PRIu16 "\n", r.MinLodWarn());
printf("\t\t CounterBankId() = %" PRIu8 "\n", r.CounterBankId());
printf("\t\t LodHdwCntEn() = %s\n", r.LodHdwCntEn() ? "true" : "false");
printf("\t\t slot = %d\n", bind.textures2D.slots[i]);
printf("\t\t start_register = %d\n", bind.textures2D.start_register[i]);
printf("\t\t extended = %s\n", (bind.textures2D.extended[i] ? "true" : "false"));
printf("\t\t usage = %s\n", Core::EnumName(bind.textures2D.usages[i]).C_Str());
printf("\t\t slot = %d\n", bind.textures2D.desc[i].slot);
printf("\t\t start_register = %d\n", bind.textures2D.desc[i].start_register);
printf("\t\t extended = %s\n", (bind.textures2D.desc[i].extended ? "true" : "false"));
printf("\t\t usage = %s\n", Core::EnumName(bind.textures2D.desc[i].usage).C_Str());
}
for (int i = 0; i < bind.samplers.samplers_num; i++)
@ -3017,7 +3029,7 @@ private:
String m_file_name;
};
ShaderCode ShaderParseVS(const VertexShaderInfo* regs)
ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Amber300);
@ -3094,7 +3106,7 @@ Vector<uint32_t> ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInp
return ret;
}
ShaderCode ShaderParsePS(const PixelShaderInfo* regs)
ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::Blue300);
@ -3178,7 +3190,7 @@ Vector<uint32_t> ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInpu
return ret;
}
ShaderCode ShaderParseCS(const ComputeShaderInfo* regs)
ShaderCode ShaderParseCS(const HW::ComputeShaderInfo* regs)
{
KYTY_PROFILER_FUNCTION(profiler::colors::CyanA700);
@ -3246,111 +3258,110 @@ Vector<uint32_t> ShaderRecompileCS(const ShaderCode& code, const ShaderComputeIn
return ret;
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
static ShaderBindParameters ShaderUpdateBindInfo(const ShaderCode& code, const ShaderBindResources* bind)
{
ShaderBindParameters p {};
auto find_image_op = [&](int index, int s, bool& found, bool& without_sampler)
{
const auto& insts = code.GetInstructions();
int size = static_cast<int>(insts.Size());
for (int i = index; i < size; i++)
{
const auto& inst = insts.At(i);
if ((inst.dst.type == ShaderOperandType::Sgpr && s >= inst.dst.register_id && s < inst.dst.register_id + inst.dst.size) ||
(inst.dst2.type == ShaderOperandType::Sgpr && s >= inst.dst2.register_id && s < inst.dst2.register_id + inst.dst2.size) ||
inst.type == ShaderInstructionType::SEndpgm)
{
break;
}
if (inst.type == ShaderInstructionType::ImageStore || inst.type == ShaderInstructionType::ImageStoreMip ||
inst.type == ShaderInstructionType::ImageLoad)
{
if (inst.src[1].register_id == s)
{
EXIT_NOT_IMPLEMENTED(found && !without_sampler);
without_sampler = true;
found = true;
}
} else if (inst.type == ShaderInstructionType::ImageSample)
{
if (inst.src[1].register_id == s)
{
EXIT_NOT_IMPLEMENTED(found && without_sampler);
without_sampler = false;
found = true;
}
}
}
};
if (bind->textures2D.textures_num > 0)
{
const auto& insts = code.GetInstructions();
for (int ti = 0; ti < bind->textures2D.textures_num; ti++)
{
bool found = false;
if (bind->textures2D.extended[ti])
{
int s = bind->extended.start_register;
int index = 0;
for (const auto& inst: insts)
{
if ((inst.dst.type == ShaderOperandType::Sgpr && s >= inst.dst.register_id &&
s < inst.dst.register_id + inst.dst.size) ||
(inst.dst2.type == ShaderOperandType::Sgpr && s >= inst.dst2.register_id &&
s < inst.dst2.register_id + inst.dst2.size) ||
inst.type == ShaderInstructionType::SEndpgm)
{
break;
}
if (inst.type == ShaderInstructionType::SLoadDwordx8 && inst.src[0].register_id == s &&
static_cast<int>(inst.src[1].constant.u >> 2u) + 16 == bind->textures2D.start_register[ti])
{
find_image_op(index + 1, inst.dst.register_id, found, p.textures2d_without_sampler[ti]);
}
index++;
}
} else
{
find_image_op(0, bind->textures2D.start_register[ti], found, p.textures2d_without_sampler[ti]);
}
EXIT_NOT_IMPLEMENTED(!found);
if (p.textures2d_without_sampler[ti])
{
p.textures2d_storage_num++;
} else
{
p.textures2d_sampled_num++;
}
}
}
return p;
}
ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info)
{
return ShaderUpdateBindInfo(code, &input_info->bind);
}
//// NOLINTNEXTLINE(readability-function-cognitive-complexity)
// static ShaderBindParameters ShaderUpdateBindInfo(const ShaderCode& code, const ShaderBindResources* bind)
//{
// ShaderBindParameters p {};
//
// auto find_image_op = [&](int index, int s, bool& found, bool& without_sampler)
// {
// const auto& insts = code.GetInstructions();
// int size = static_cast<int>(insts.Size());
// for (int i = index; i < size; i++)
// {
// const auto& inst = insts.At(i);
//
// if ((inst.dst.type == ShaderOperandType::Sgpr && s >= inst.dst.register_id && s < inst.dst.register_id + inst.dst.size) ||
// (inst.dst2.type == ShaderOperandType::Sgpr && s >= inst.dst2.register_id && s < inst.dst2.register_id + inst.dst2.size) ||
// inst.type == ShaderInstructionType::SEndpgm)
// {
// break;
// }
//
// if (inst.type == ShaderInstructionType::ImageStore || inst.type == ShaderInstructionType::ImageStoreMip)
// {
// if (inst.src[1].register_id == s)
// {
// EXIT_NOT_IMPLEMENTED(found && !without_sampler);
// without_sampler = true;
// found = true;
// }
// } else if (inst.type == ShaderInstructionType::ImageSample || inst.type == ShaderInstructionType::ImageLoad)
// {
// if (inst.src[1].register_id == s)
// {
// EXIT_NOT_IMPLEMENTED(found && without_sampler);
// without_sampler = false;
// found = true;
// }
// }
// }
// };
//
// if (bind->textures2D.textures_num > 0)
// {
// const auto& insts = code.GetInstructions();
//
// for (int ti = 0; ti < bind->textures2D.textures_num; ti++)
// {
// bool found = false;
// if (bind->textures2D.desc[ti].extended)
// {
// int s = bind->extended.start_register;
//
// int index = 0;
// for (const auto& inst: insts)
// {
// if ((inst.dst.type == ShaderOperandType::Sgpr && s >= inst.dst.register_id &&
// s < inst.dst.register_id + inst.dst.size) ||
// (inst.dst2.type == ShaderOperandType::Sgpr && s >= inst.dst2.register_id &&
// s < inst.dst2.register_id + inst.dst2.size) ||
// inst.type == ShaderInstructionType::SEndpgm)
// {
// break;
// }
//
// if (inst.type == ShaderInstructionType::SLoadDwordx8 && inst.src[0].register_id == s &&
// static_cast<int>(inst.src[1].constant.u >> 2u) + 16 == bind->textures2D.desc[ti].start_register)
// {
// find_image_op(index + 1, inst.dst.register_id, found, p.textures2d_without_sampler[ti]);
// }
//
// index++;
// }
// } else
// {
// find_image_op(0, bind->textures2D.desc[ti].start_register, found, p.textures2d_without_sampler[ti]);
// }
//
// EXIT_NOT_IMPLEMENTED(!found);
//
// if (p.textures2d_without_sampler[ti])
// {
// p.textures2d_storage_num++;
// } else
// {
// p.textures2d_sampled_num++;
// }
// }
// }
// return p;
//}
//
// ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info)
//{
// return ShaderUpdateBindInfo(code, &input_info->bind);
//}
//
// ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info)
//{
// return ShaderUpdateBindInfo(code, &input_info->bind);
//}
//
// ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info)
//{
// return ShaderUpdateBindInfo(code, &input_info->bind);
//}
static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind)
{
@ -3402,10 +3413,10 @@ static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind)
// ret->ids.Add(r.MinLodWarn());
// ret->ids.Add(r.CounterBankId());
// ret->ids.Add(static_cast<uint32_t>(r.LodHdwCntEn()));
ret->ids.Add(bind.textures2D.slots[i]);
ret->ids.Add(bind.textures2D.start_register[i]);
ret->ids.Add(static_cast<uint32_t>(bind.textures2D.extended[i]));
ret->ids.Add(static_cast<uint32_t>(bind.textures2D.usages[i]));
ret->ids.Add(bind.textures2D.desc[i].slot);
ret->ids.Add(bind.textures2D.desc[i].start_register);
ret->ids.Add(static_cast<uint32_t>(bind.textures2D.desc[i].extended));
ret->ids.Add(static_cast<uint32_t>(bind.textures2D.desc[i].usage));
}
ret->ids.Add(bind.samplers.samplers_num);
@ -3460,7 +3471,7 @@ static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind)
ret->ids.Add(bind.extended.start_register);
}
ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info)
ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info)
{
KYTY_PROFILER_FUNCTION();
@ -3526,7 +3537,7 @@ ShaderId ShaderGetIdVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo
return ret;
}
ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info)
ShaderId ShaderGetIdPS(const HW::PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info)
{
KYTY_PROFILER_FUNCTION();
@ -3567,7 +3578,7 @@ ShaderId ShaderGetIdPS(const PixelShaderInfo* regs, const ShaderPixelInputInfo*
return ret;
}
ShaderId ShaderGetIdCS(const ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info)
ShaderId ShaderGetIdCS(const HW::ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info)
{
const auto* src = reinterpret_cast<const uint32_t*>(regs->cs_regs.data_addr);

View file

@ -1113,6 +1113,42 @@ constexpr char32_t EMBEDDED_SHADER_VS_0[] = UR"(
OpFunctionEnd
)";
constexpr char32_t EMBEDDED_SHADER_PS_0[] = UR"(
; #version 450
;
; layout(location = 0) out vec4 outColor;
;
; void main() {
; outColor = vec4(0);
; }
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %4 "main" %9
OpExecutionMode %4 OriginUpperLeft
; Annotations
OpDecorate %9 Location 0
; Types, variables and constants
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%9 = OpVariable %_ptr_Output_v4float Output
%float_0 = OpConstant %float 0
%11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
; Function 4
%4 = OpFunction %void None %3
%5 = OpLabel
OpStore %9 %11
OpReturn
OpFunctionEnd
)";
constexpr char32_t EXECZ[] = UR"(
%z191_<index> = OpLoad %uint %exec_lo
%z192_<index> = OpIEqual %bool %z191_<index> %uint_0
@ -1219,8 +1255,8 @@ public:
void SetPsInputInfo(const ShaderPixelInputInfo* input_info) { m_ps_input_info = input_info; }
[[nodiscard]] const ShaderPixelInputInfo* GetPsInputInfo() const { return m_ps_input_info; }
[[nodiscard]] const ShaderBindResources* GetBindInfo() const { return m_bind; }
[[nodiscard]] const ShaderBindParameters& GetBindParams() const { return m_bind_params; }
[[nodiscard]] const ShaderBindResources* GetBindInfo() const { return m_bind; }
//[[nodiscard]] const ShaderBindParameters& GetBindParams() const { return m_bind_params; }
void AddConstantUint(uint32_t u);
void AddConstantInt(int i);
@ -1280,7 +1316,7 @@ private:
const ShaderComputeInputInfo* m_cs_input_info = nullptr;
const ShaderPixelInputInfo* m_ps_input_info = nullptr;
const ShaderBindResources* m_bind = nullptr;
ShaderBindParameters m_bind_params;
// ShaderBindParameters m_bind_params;
Core::Array2<int, 64, 2> m_extended_mapping {};
};
@ -2183,11 +2219,11 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done)
KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
const auto& bind_params = spirv->GetBindParams();
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
// const auto& bind_params = spirv->GetBindParams();
if (bind_info != nullptr && bind_params.textures2d_sampled_num > 0 && bind_info->samplers.samplers_num > 0)
if (bind_info != nullptr && bind_info->textures2D.textures2d_sampled_num > 0 && bind_info->samplers.samplers_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
@ -2248,11 +2284,11 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7)
KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
const auto& bind_params = spirv->GetBindParams();
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
// const auto& bind_params = spirv->GetBindParams();
if (bind_info != nullptr && bind_params.textures2d_sampled_num > 0 && bind_info->samplers.samplers_num > 0)
if (bind_info != nullptr && bind_info->textures2D.textures2d_sampled_num > 0 && bind_info->samplers.samplers_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
@ -2318,11 +2354,11 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF)
KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
const auto& bind_params = spirv->GetBindParams();
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
// const auto& bind_params = spirv->GetBindParams();
if (bind_info != nullptr && bind_params.textures2d_storage_num > 0)
if (bind_info != nullptr && bind_info->textures2D.textures2d_sampled_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
@ -2344,14 +2380,14 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF)
static const char32_t* text = UR"(
%t24_<index> = OpLoad %uint %<src1_value0>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_ImageL %textures2D_L %t24_<index>
%t27_<index> = OpLoad %ImageL %t26_<index>
%t26_<index> = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_<index>
%t27_<index> = OpLoad %ImageS %t26_<index>
%t67_<index> = OpLoad %float %<src0_value0>
%t69_<index> = OpBitcast %uint %t67_<index>
%t70_<index> = OpLoad %float %<src0_value1>
%t71_<index> = OpBitcast %uint %t70_<index>
%t73_<index> = OpCompositeConstruct %v2uint %t69_<index> %t71_<index>
%t74_<index> = OpImageRead %v4float %t27_<index> %t73_<index>
%t74_<index> = OpImageFetch %v4float %t27_<index> %t73_<index>
OpStore %temp_v4float %t74_<index>
%t46_<index> = OpAccessChain %_ptr_Function_float %temp_v4float %uint_0
%t47_<index> = OpLoad %float %t46_<index>
@ -2385,11 +2421,11 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF)
KYTY_RECOMPILER_FUNC(Recompile_ImageStore_Vdata4Vaddr3StDmaskF)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
const auto& bind_params = spirv->GetBindParams();
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
// const auto& bind_params = spirv->GetBindParams();
if (bind_info != nullptr && bind_params.textures2d_storage_num > 0)
if (bind_info != nullptr && bind_info->textures2D.textures2d_storage_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
@ -2453,11 +2489,11 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageStore_Vdata4Vaddr3StDmaskF)
KYTY_RECOMPILER_FUNC(Recompile_ImageStoreMip_Vdata4Vaddr4StDmaskF)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
const auto& bind_params = spirv->GetBindParams();
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
// const auto& bind_params = spirv->GetBindParams();
if (bind_info != nullptr && bind_params.textures2d_storage_num > 0)
if (bind_info != nullptr && bind_info->textures2D.textures2d_storage_num > 0)
{
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
@ -5206,20 +5242,20 @@ void Spirv::GenerateSource()
switch (m_code.GetType())
{
case ShaderType::Pixel:
m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr);
m_bind_params = (m_ps_input_info != nullptr ? ShaderGetBindParametersPS(m_code, m_ps_input_info) : ShaderBindParameters());
m_bind = (m_ps_input_info != nullptr ? &m_ps_input_info->bind : nullptr);
// m_bind_params = (m_ps_input_info != nullptr ? ShaderGetBindParametersPS(m_code, m_ps_input_info) : ShaderBindParameters());
break;
case ShaderType::Vertex:
m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr);
m_bind_params = (m_vs_input_info != nullptr ? ShaderGetBindParametersVS(m_code, m_vs_input_info) : ShaderBindParameters());
m_bind = (m_vs_input_info != nullptr ? &m_vs_input_info->bind : nullptr);
// m_bind_params = (m_vs_input_info != nullptr ? ShaderGetBindParametersVS(m_code, m_vs_input_info) : ShaderBindParameters());
break;
case ShaderType::Compute:
m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr);
m_bind_params = (m_cs_input_info != nullptr ? ShaderGetBindParametersCS(m_code, m_cs_input_info) : ShaderBindParameters());
m_bind = (m_cs_input_info != nullptr ? &m_cs_input_info->bind : nullptr);
// m_bind_params = (m_cs_input_info != nullptr ? ShaderGetBindParametersCS(m_code, m_cs_input_info) : ShaderBindParameters());
break;
default:
m_bind = nullptr;
m_bind_params = ShaderBindParameters();
m_bind = nullptr;
// m_bind_params = ShaderBindParameters();
break;
}
@ -5269,11 +5305,11 @@ void Spirv::WriteHeader()
{
vars.Add(U"%buf");
}
if (m_bind_params.textures2d_sampled_num > 0)
if (m_bind->textures2D.textures2d_sampled_num > 0)
{
vars.Add(U"%textures2D_S");
}
if (m_bind_params.textures2d_storage_num > 0)
if (m_bind->textures2D.textures2d_storage_num > 0)
{
vars.Add(U"%textures2D_L");
}
@ -5473,13 +5509,13 @@ void Spirv::WriteAnnotations()
.ReplaceStr(U"<DescriptorSet>", String::FromPrintf("%u", m_bind->descriptor_set_slot))
.ReplaceStr(U"<BindingIndex>", String::FromPrintf("%d", m_bind->storage_buffers.binding_index));
}
if (m_bind_params.textures2d_sampled_num > 0)
if (m_bind->textures2D.textures2d_sampled_num > 0)
{
m_source += String(textures_annotations_s)
.ReplaceStr(U"<DescriptorSet>", String::FromPrintf("%u", m_bind->descriptor_set_slot))
.ReplaceStr(U"<BindingIndex>", String::FromPrintf("%d", m_bind->textures2D.binding_sampled_index));
}
if (m_bind_params.textures2d_storage_num > 0)
if (m_bind->textures2D.textures2d_storage_num > 0)
{
m_source += String(textures_annotations_l)
.ReplaceStr(U"<DescriptorSet>", String::FromPrintf("%u", m_bind->descriptor_set_slot))
@ -5645,15 +5681,15 @@ void Spirv::WriteTypes()
m_source +=
String(storage_buffers_types).ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind->storage_buffers.buffers_num));
}
if (m_bind_params.textures2d_sampled_num > 0)
if (m_bind->textures2D.textures2d_sampled_num > 0)
{
m_source +=
String(textures_sampled_types).ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind_params.textures2d_sampled_num));
m_source += String(textures_sampled_types)
.ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind->textures2D.textures2d_sampled_num));
}
if (m_bind_params.textures2d_storage_num > 0)
if (m_bind->textures2D.textures2d_storage_num > 0)
{
m_source +=
String(textures_loaded_types).ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind_params.textures2d_storage_num));
m_source += String(textures_loaded_types)
.ReplaceStr(U"<buffers_num>", String::FromPrintf("%d", m_bind->textures2D.textures2d_storage_num));
}
if (m_bind->samplers.samplers_num > 0)
{
@ -5719,15 +5755,15 @@ void Spirv::WriteGlobalVariables()
vars.Add(String::FromPrintf("%%buf = OpVariable %%_ptr_StorageBuffer__arr_BufferObject_uint_%d StorageBuffer",
m_bind->storage_buffers.buffers_num));
}
if (m_bind_params.textures2d_sampled_num > 0)
if (m_bind->textures2D.textures2d_sampled_num > 0)
{
vars.Add(String::FromPrintf("%%textures2D_S = OpVariable %%_ptr_UniformConstant__arr_ImageS_uint_%d UniformConstant",
m_bind_params.textures2d_sampled_num));
m_bind->textures2D.textures2d_sampled_num));
}
if (m_bind_params.textures2d_storage_num > 0)
if (m_bind->textures2D.textures2d_storage_num > 0)
{
vars.Add(String::FromPrintf("%%textures2D_L = OpVariable %%_ptr_UniformConstant__arr_ImageL_uint_%d UniformConstant",
m_bind_params.textures2d_storage_num));
m_bind->textures2D.textures2d_storage_num));
}
if (m_bind->samplers.samplers_num > 0)
{
@ -5950,8 +5986,8 @@ void Spirv::WriteLocalVariables()
for (int i = 0; i < m_bind->textures2D.textures_num; i++)
{
int start_reg = m_bind->textures2D.start_register[i];
bool extended = m_bind->textures2D.extended[i];
int start_reg = m_bind->textures2D.desc[i].start_register;
bool extended = m_bind->textures2D.desc[i].extended;
for (int ti = 0; ti < 2; ti++)
{
@ -6304,7 +6340,7 @@ void Spirv::FindVariables()
}
for (int i = 0; i < m_bind->textures2D.textures_num; i++)
{
int storage_start = m_bind->textures2D.start_register[i];
int storage_start = m_bind->textures2D.desc[i].start_register;
AddVariable(ShaderOperandType::Sgpr, storage_start, 8);
}
for (int i = 0; i < m_bind->samplers.samplers_num; i++)
@ -6339,9 +6375,7 @@ String SpirvGetEmbeddedPs(uint32_t id)
{
EXIT_NOT_IMPLEMENTED(id != 0);
KYTY_NOT_IMPLEMENTED;
return U"";
return EMBEDDED_SHADER_PS_0;
}
} // namespace Kyty::Libs::Graphics

View file

@ -673,6 +673,11 @@ void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t h
{ {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, } },
{ 10, 0, 256, 256, 9, 14, true, {262144, 65536, 16384, 4096, 1024, 256, 256, 256, 256, },
{ {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, } },
// kDataFormatR32Float, 1920, 1080, kTileModeDepth_2dThin_256
{ 4, 7, 1920, 1080, 11, 2, false, {8355840, 12615680, 1048576, 262144, 65536, 16384, 2048, 1024, 1024, 1024, 1024, },
{ {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, } },
{ 4, 7, 1920, 1080, 11, 2, true, {8847360, 12124160, 1048576, 262144, 65536, 16384, 2048, 1024, 1024, 1024, 1024, },
{ {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, } },
// clang-format on
};

View file

@ -1298,10 +1298,17 @@ static VkPhysicalDevice VulkanFindPhysicalDevice(VkInstance instance, VkSurfaceK
bool skip_device = true;
VkPhysicalDeviceProperties device_properties {};
VkPhysicalDeviceFeatures device_features {};
VkPhysicalDeviceFeatures2 device_features2 {};
VkPhysicalDeviceColorWriteEnableFeaturesEXT color_write_ext {};
color_write_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT;
color_write_ext.pNext = nullptr;
device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
device_features2.pNext = &color_write_ext;
vkGetPhysicalDeviceProperties(device, &device_properties);
vkGetPhysicalDeviceFeatures(device, &device_features);
vkGetPhysicalDeviceFeatures2(device, &device_features2);
printf("Vulkan device: %s\n", device_properties.deviceName);
@ -1331,7 +1338,14 @@ static VkPhysicalDevice VulkanFindPhysicalDevice(VkInstance instance, VkSurfaceK
}
}
if (device_features.fragmentStoresAndAtomics != VK_TRUE)
if (color_write_ext.colorWriteEnable != VK_TRUE)
{
printf("colorWriteEnable is not supported\n");
skip_device = true;
}
if (device_features2.features.fragmentStoresAndAtomics != VK_TRUE)
{
printf("fragmentStoresAndAtomics is not supported\n");
skip_device = true;
@ -1529,9 +1543,14 @@ static VkDevice VulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKH
VkPhysicalDeviceFeatures device_features {};
device_features.fragmentStoresAndAtomics = VK_TRUE;
VkPhysicalDeviceColorWriteEnableFeaturesEXT color_write_ext {};
color_write_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT;
color_write_ext.pNext = nullptr;
color_write_ext.colorWriteEnable = VK_TRUE;
VkDeviceCreateInfo create_info {};
create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
create_info.pNext = nullptr;
create_info.pNext = &color_write_ext;
create_info.flags = 0;
create_info.pQueueCreateInfos = &queue_create_info;
create_info.queueCreateInfoCount = 1;
@ -1965,7 +1984,7 @@ static void VulkanCreate(WindowContext* ctx)
}
Vector<const char*> device_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
"VK_KHR_maintenance1"};
VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, "VK_KHR_maintenance1"};
#ifdef KYTY_ENABLE_DEBUG_PRINTF
if (Config::SpirvDebugPrintfEnabled())

View file

@ -1057,7 +1057,10 @@ void RuntimeLinker::LoadProgramToMemory(Program* program)
program->base_vaddr + program->base_size_aligned + exception_handler_size + tls_handler_size,
kyty_exception_handler);
VirtualMemory::ExceptionHandler::InstallVectored(kyty_exception_handler);
if (Libs::Graphics::GpuMemoryWatcherEnabled())
{
VirtualMemory::ExceptionHandler::InstallVectored(kyty_exception_handler);
}
}
// program->elf->SetBaseVAddr(program->base_vaddr);