PS5 graphics (#42)

This commit is contained in:
InoriRus 2022-08-18 12:29:59 +03:00
parent 90acd3d364
commit a753022096
10 changed files with 469 additions and 324 deletions

View file

@ -1,4 +1,4 @@
version: 0.1.12.build-{build}
version: 0.1.13.build-{build}
image: Visual Studio 2019
environment:
matrix:

View file

@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD)
set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000")
endif()
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.12)
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.13)
include(src_script.cmake)

View file

@ -585,9 +585,11 @@ enum class UserSgprType
struct UserSgprInfo
{
uint32_t value[16] = {0};
UserSgprType type[16] = {};
uint32_t count = 0;
static constexpr int SGPRS_MAX = 16;
uint32_t value[SGPRS_MAX] = {0};
UserSgprType type[SGPRS_MAX] = {};
uint32_t count = 0;
};
struct VertexShaderInfo

View file

@ -101,7 +101,9 @@ enum class ShaderInstructionType : uint32_t
SMovB32,
SMovB64,
SMovkI32,
SMulHiU32,
SMulI32,
SMulkI32,
SNandB64,
SNorB64,
SOrB32,
@ -181,6 +183,7 @@ enum class ShaderInstructionType : uint32_t
VFloorF32,
VFmaF32,
VFractF32,
VInterpMovF32,
VInterpP1F32,
VInterpP2F32,
VLogF32,
@ -716,6 +719,11 @@ struct ShaderGdsResource
[[nodiscard]] uint16_t Size() const { return field & 0xFFFFu; }
};
struct ShaderDirectSgprResource
{
uint32_t field = 0;
};
struct ShaderExtendedResource
{
uint32_t fields[2] = {0};
@ -823,6 +831,15 @@ struct ShaderGdsResources
int binding_index = 0;
};
struct ShaderDirectSgprsResources
{
static constexpr int SGPRS_MAX = 4;
ShaderDirectSgprResource sgprs[SGPRS_MAX];
int start_register[SGPRS_MAX] = {0};
int sgprs_num = 0;
};
struct ShaderExtendedResources
{
bool used = false;
@ -833,14 +850,15 @@ struct ShaderExtendedResources
struct ShaderBindResources
{
uint32_t push_constant_offset = 0;
uint32_t push_constant_size = 0;
uint32_t descriptor_set_slot = 0;
ShaderStorageResources storage_buffers;
ShaderTextureResources textures2D;
ShaderSamplerResources samplers;
ShaderGdsResources gds_pointers;
ShaderExtendedResources extended;
uint32_t push_constant_offset = 0;
uint32_t push_constant_size = 0;
uint32_t descriptor_set_slot = 0;
ShaderStorageResources storage_buffers;
ShaderTextureResources textures2D;
ShaderSamplerResources samplers;
ShaderGdsResources gds_pointers;
ShaderDirectSgprsResources direct_sgprs;
ShaderExtendedResources extended;
};
struct ShaderVertexInputInfo

View file

@ -1949,12 +1949,12 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou
EXIT_IF(push_constant_info == nullptr);
EXIT_IF(push_constant_info_num == nullptr);
bool need_bind = (bind.storage_buffers.buffers_num > 0 || bind.textures2D.textures_num > 0 || bind.samplers.samplers_num > 0 ||
bind.gds_pointers.pointers_num > 0);
bool need_descriptor = (bind.storage_buffers.buffers_num > 0 || bind.textures2D.textures_num > 0 || bind.samplers.samplers_num > 0 ||
bind.gds_pointers.pointers_num > 0);
EXIT_IF(need_bind && bind.push_constant_size == 0);
EXIT_IF(need_descriptor && bind.push_constant_size == 0);
if (need_bind)
if (bind.push_constant_size != 0)
{
auto index = *push_constant_info_num;
@ -1964,7 +1964,7 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou
(*push_constant_info_num)++;
}
if (need_bind)
if (need_descriptor)
{
EXIT_IF(bind.descriptor_set_slot != *set_layouts_num);
@ -4488,6 +4488,26 @@ static void PrepareGdsPointers(const ShaderGdsResources& gds_pointers, uint32_t*
}
}
static void PrepareDirectSgprs(const ShaderDirectSgprsResources& direct_sgprs, uint32_t** sgprs)
{
KYTY_PROFILER_FUNCTION();
EXIT_IF(sgprs == nullptr);
EXIT_IF(*sgprs == nullptr);
for (int i = 0; i < direct_sgprs.sgprs_num; i++)
{
auto r = direct_sgprs.sgprs[i];
(*sgprs)[i] = r.field;
}
if (direct_sgprs.sgprs_num > 0)
{
(*sgprs) += static_cast<ptrdiff_t>(4 * ((direct_sgprs.sgprs_num - 1) / 4 + 1));
}
}
static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelineBindPoint pipeline_bind_point, VkPipelineLayout layout,
const ShaderBindResources& bind, VkShaderStageFlags vk_stage, DescriptorCache::Stage stage)
{
@ -4503,6 +4523,8 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin
bind.textures2D.textures_num);
EXIT_NOT_IMPLEMENTED(bind.samplers.samplers_num > DescriptorCache::SAMPLERS_MAX);
bool need_descriptor = false;
VulkanBuffer* storage_buffers[DescriptorCache::BUFFERS_MAX];
VulkanImage* textures2d_sampled[DescriptorCache::TEXTURES_SAMPLED_MAX];
int textures2d_sampled_view[DescriptorCache::TEXTURES_SAMPLED_MAX];
@ -4517,29 +4539,32 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin
if (bind.storage_buffers.buffers_num > 0)
{
PrepareStorageBuffers(submit_id, buffer, bind.storage_buffers, storage_buffers, &sgprs_ptr);
need_descriptor = true;
}
if (bind.textures2D.textures_num > 0)
{
PrepareTextures(submit_id, buffer, bind.textures2D, textures2d_sampled, textures2d_storage, textures2d_sampled_view,
&sgprs_ptr);
need_descriptor = true;
}
if (bind.samplers.samplers_num > 0)
{
PrepareSamplers(bind.samplers, samplers, &sgprs_ptr);
need_descriptor = true;
}
if (bind.gds_pointers.pointers_num > 0)
{
PrepareGdsPointers(bind.gds_pointers, &sgprs_ptr);
gds_buffer = g_render_ctx->GetGdsBuffer()->GetBuffer(g_render_ctx->GetGraphicCtx());
gds_buffer = g_render_ctx->GetGdsBuffer()->GetBuffer(g_render_ctx->GetGraphicCtx());
need_descriptor = true;
}
if (bind.direct_sgprs.sgprs_num > 0)
{
PrepareDirectSgprs(bind.direct_sgprs, &sgprs_ptr);
}
EXIT_IF(bind.push_constant_size != (sgprs_ptr - sgprs) * 4);
auto* descriptor_set = g_render_ctx->GetDescriptorCache()->GetDescriptor(
stage, storage_buffers, textures2d_sampled, textures2d_sampled_view, textures2d_storage, samplers, &gds_buffer, bind);
EXIT_IF(descriptor_set == nullptr);
auto* vk_buffer = buffer->GetPool()->buffers[buffer->GetIndex()];
if (bind.textures2D.textures_num > 0)
@ -4556,7 +4581,15 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin
}
}
vkCmdBindDescriptorSets(vk_buffer, pipeline_bind_point, layout, bind.descriptor_set_slot, 1, &descriptor_set->set, 0, nullptr);
if (need_descriptor)
{
auto* descriptor_set = g_render_ctx->GetDescriptorCache()->GetDescriptor(
stage, storage_buffers, textures2d_sampled, textures2d_sampled_view, textures2d_storage, samplers, &gds_buffer, bind);
EXIT_IF(descriptor_set == nullptr);
vkCmdBindDescriptorSets(vk_buffer, pipeline_bind_point, layout, bind.descriptor_set_slot, 1, &descriptor_set->set, 0, nullptr);
}
vkCmdPushConstants(vk_buffer, layout, vk_stage, bind.push_constant_offset, bind.push_constant_size, sgprs);
}
}

View file

@ -1176,7 +1176,8 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_
{
GraphicsRenderWriteAtEndOfPipe32(m_sumbit_id, m_buffer[m_current_buffer], static_cast<uint32_t*>(dst_gpu_addr), value);
} else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x05) ||
(eop_event_type == 0x2f && event_index == 0x06) || (eop_event_type == 0x14 && event_index == 0x00)) &&
(eop_event_type == 0x2f && event_index == 0x06) || (eop_event_type == 0x14 && event_index == 0x00) ||
(eop_event_type == 0x28 && event_index == 0x00)) &&
cache_action == 0x38 && source64 && !with_interrupt)
{
GraphicsRenderWriteAtEndOfPipeWithWriteBack64(m_sumbit_id, m_buffer[m_current_buffer], static_cast<uint64_t*>(dst_gpu_addr), value);
@ -2249,6 +2250,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_cs_user_sgpr)
auto reg_num = (cmd_id >> 16u) & 0x3fffu;
EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX);
for (uint32_t i = 0; i < reg_num; i++)
{
cp->GetShCtx()->SetCsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker());
@ -2350,6 +2353,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_ps_user_sgpr)
auto reg_num = (cmd_id >> 16u) & 0x3fffu;
EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX);
for (uint32_t i = 0; i < reg_num; i++)
{
cp->GetShCtx()->SetPsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker());
@ -2432,6 +2437,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_vs_user_sgpr)
auto reg_num = (cmd_id >> 16u) & 0x3fffu;
EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX);
for (uint32_t i = 0; i < reg_num; i++)
{
cp->GetShCtx()->SetVsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker());
@ -2449,6 +2456,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_gs_user_sgpr)
auto reg_num = (cmd_id >> 16u) & 0x3fffu;
EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX);
for (uint32_t i = 0; i < reg_num; i++)
{
cp->GetShCtx()->SetGsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker());

View file

@ -87,6 +87,7 @@ struct ShaderParsedUsage
bool extended_buffer = false;
int samplers = 0;
int gds_pointers = 0;
int direct_sgprs = 0;
};
struct ShaderDebugPrintfCmds
@ -1137,7 +1138,7 @@ static void ShaderParseAttrib(ShaderVertexInputInfo* info, const ShaderSemantic*
}
}
static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index, int slot, ShaderStorageUsage usage,
static void ShaderGetStorageBuffer(ShaderStorageResources* info, bool* direct_sgprs, int start_index, int slot, ShaderStorageUsage usage,
const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -1162,6 +1163,8 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
direct_sgprs[start_index + j] = false;
}
}
@ -1173,7 +1176,7 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index
info->buffers_num++;
}
static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index, int slot, ShaderTextureUsage usage,
static void ShaderGetTextureBuffer(ShaderTextureResources* info, bool* direct_sgprs, int start_index, int slot, ShaderTextureUsage usage,
const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -1210,6 +1213,8 @@ static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
direct_sgprs[start_index + j] = false;
}
}
@ -1225,7 +1230,7 @@ static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index
info->textures_num++;
}
static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
static void ShaderGetSampler(ShaderSamplerResources* info, bool* direct_sgprs, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -1249,6 +1254,8 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int
{
auto type = user_sgpr.type[start_index + j];
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region);
direct_sgprs[start_index + j] = false;
}
}
@ -1260,7 +1267,7 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int
info->samplers_num++;
}
static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
static void ShaderGetGdsPointer(ShaderGdsResources* info, bool* direct_sgprs, int start_index, int slot, const HW::UserSgprInfo& user_sgpr,
const uint32_t* extended_buffer)
{
EXIT_IF(info == nullptr);
@ -1282,6 +1289,8 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s
{
auto type = user_sgpr.type[start_index];
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Unknown);
direct_sgprs[start_index] = false;
}
info->pointers[index].field = (extended ? extended_buffer[start_index - 16] : user_sgpr.value[start_index]);
@ -1289,6 +1298,26 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s
info->pointers_num++;
}
static void ShaderGetDirectSgpr(ShaderDirectSgprsResources* info, int start_index, const HW::UserSgprInfo& user_sgpr)
{
EXIT_IF(info == nullptr);
EXIT_NOT_IMPLEMENTED(info->sgprs_num < 0 || info->sgprs_num >= ShaderDirectSgprsResources::SGPRS_MAX);
int index = info->sgprs_num;
EXIT_NOT_IMPLEMENTED(start_index >= 16);
info->start_register[index] = start_index;
auto type = user_sgpr.type[start_index];
EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Unknown);
info->sgprs[index].field = user_sgpr.value[start_index];
info->sgprs_num++;
}
void ShaderCalcBindingIndices(ShaderBindResources* bind)
{
KYTY_PROFILER_FUNCTION();
@ -1323,11 +1352,17 @@ void ShaderCalcBindingIndices(ShaderBindResources* bind)
bind->push_constant_size += (((bind->gds_pointers.pointers_num - 1) / 4) + 1) * 16;
}
if (bind->direct_sgprs.sgprs_num > 0)
{
bind->push_constant_size += (((bind->direct_sgprs.sgprs_num - 1) / 4) + 1) * 16;
}
EXIT_IF((bind->push_constant_size % 16) != 0);
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr)
void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr,
int user_sgpr_num)
{
KYTY_PROFILER_FUNCTION();
@ -1352,9 +1387,16 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource
info->extended_buffer = false;
info->samplers = 0;
info->gds_pointers = 0;
info->direct_sgprs = 0;
uint32_t* extended_buffer = nullptr;
bool direct_sgprs[HW::UserSgprInfo::SGPRS_MAX];
for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++)
{
direct_sgprs[i] = (i < user_sgpr_num);
}
for (int i = 0; i < usages.slots_num; i++)
{
const auto& usage = usages.slots[i];
@ -1364,13 +1406,13 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource
EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3);
if (usage.flags == 0)
{
ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly,
user_sgpr, extended_buffer);
ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot,
ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer);
info->storage_buffers_readonly++;
} else if (usage.flags == 3)
{
ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, user_sgpr,
extended_buffer);
ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly,
user_sgpr, extended_buffer);
info->textures2D_readonly++;
EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9);
}
@ -1378,14 +1420,14 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource
case 0x01:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer);
ShaderGetSampler(&bind->samplers, direct_sgprs, usage.start_register, usage.slot, user_sgpr, extended_buffer);
info->samplers++;
break;
case 0x02:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, user_sgpr,
extended_buffer);
ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot, ShaderStorageUsage::Constant,
user_sgpr, extended_buffer);
info->storage_buffers_constant++;
break;
@ -1393,13 +1435,13 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource
EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3);
if (usage.flags == 0)
{
ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadWrite,
user_sgpr, extended_buffer);
ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot,
ShaderStorageUsage::ReadWrite, user_sgpr, extended_buffer);
info->storage_buffers_readwrite++;
} else if (usage.flags == 3)
{
ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, user_sgpr,
extended_buffer);
ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite,
user_sgpr, extended_buffer);
info->textures2D_readwrite++;
EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9);
}
@ -1407,45 +1449,61 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource
case 0x07:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer);
ShaderGetGdsPointer(&bind->gds_pointers, direct_sgprs, usage.start_register, usage.slot, user_sgpr, extended_buffer);
info->gds_pointers++;
break;
case 0x12:
EXIT_NOT_IMPLEMENTED(usage.slot != 0);
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
info->fetch = true;
info->fetch_reg = usage.start_register;
info->fetch = true;
info->fetch_reg = usage.start_register;
direct_sgprs[usage.start_register] = false;
direct_sgprs[usage.start_register + 1] = false;
break;
case 0x17:
EXIT_NOT_IMPLEMENTED(usage.slot != 0);
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
info->vertex_buffer = true;
info->vertex_buffer_reg = usage.start_register;
info->vertex_buffer = true;
info->vertex_buffer_reg = usage.start_register;
direct_sgprs[usage.start_register] = false;
direct_sgprs[usage.start_register + 1] = false;
break;
case 0x1b:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
EXIT_NOT_IMPLEMENTED(usage.slot != 1);
EXIT_NOT_IMPLEMENTED(bind->extended.used);
bind->extended.used = true;
bind->extended.slot = usage.slot;
bind->extended.start_register = usage.start_register;
bind->extended.data.fields[0] = user_sgpr.value[usage.start_register];
bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1];
extended_buffer = reinterpret_cast<uint32_t*>(bind->extended.data.Base());
info->extended_buffer = true;
EXIT_NOT_IMPLEMENTED(usage.start_register + 1 >= HW::UserSgprInfo::SGPRS_MAX);
bind->extended.used = true;
bind->extended.slot = usage.slot;
bind->extended.start_register = usage.start_register;
bind->extended.data.fields[0] = user_sgpr.value[usage.start_register];
bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1];
extended_buffer = reinterpret_cast<uint32_t*>(bind->extended.data.Base());
info->extended_buffer = true;
direct_sgprs[usage.start_register] = false;
direct_sgprs[usage.start_register + 1] = false;
break;
default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
}
}
for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++)
{
if (direct_sgprs[i])
{
ShaderGetDirectSgpr(&bind->direct_sgprs, i, user_sgpr);
info->direct_sgprs++;
}
}
}
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, ShaderBindResources* bind,
const HW::UserSgprInfo& user_sgpr)
const HW::UserSgprInfo& user_sgpr, int user_sgpr_num)
{
KYTY_PROFILER_FUNCTION();
@ -1464,6 +1522,7 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
info->extended_buffer = false;
info->samplers = 0;
info->gds_pointers = 0;
info->direct_sgprs = 0;
EXIT_NOT_IMPLEMENTED(user_data == nullptr);
EXIT_NOT_IMPLEMENTED(user_data->eud_size_dw != 0);
@ -1471,6 +1530,12 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
uint32_t* extended_buffer = nullptr;
bool direct_sgprs[HW::UserSgprInfo::SGPRS_MAX];
for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++)
{
direct_sgprs[i] = (i < user_sgpr_num);
}
for (uint16_t type = 0; type < user_data->direct_resource_count; type++)
{
if (user_data->direct_resource_offset[type] == 0xffff)
@ -1483,13 +1548,17 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
switch (type)
{
case 8:
info->vertex_buffer = true;
info->vertex_buffer_reg = reg;
info->vertex_buffer = true;
info->vertex_buffer_reg = reg;
direct_sgprs[info->vertex_buffer_reg] = false;
direct_sgprs[info->vertex_buffer_reg + 1] = false;
break;
case 10:
info->vertex_attrib = true;
info->vertex_attrib_reg = reg;
info->vertex_attrib = true;
info->vertex_attrib_reg = reg;
direct_sgprs[info->vertex_attrib_reg] = false;
direct_sgprs[info->vertex_attrib_reg + 1] = false;
break;
default: EXIT("unknown usage type: 0x%04" PRIx16 "\n", type);
@ -1506,7 +1575,7 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
}
EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[0][slot].size != 0);
ShaderGetTextureBuffer(&bind->textures2D, user_data->sharp_resource_offset[0][slot].offset_dw, slot,
ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, user_data->sharp_resource_offset[0][slot].offset_dw, slot,
ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer);
info->textures2D_readonly++;
EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9);
@ -1525,7 +1594,8 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
}
EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[2][slot].size != 1);
ShaderGetSampler(&bind->samplers, user_data->sharp_resource_offset[2][slot].offset_dw, slot, user_sgpr, extended_buffer);
ShaderGetSampler(&bind->samplers, direct_sgprs, user_data->sharp_resource_offset[2][slot].offset_dw, slot, user_sgpr,
extended_buffer);
info->samplers++;
}
}
@ -1540,208 +1610,22 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info,
}
EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[3][slot].size != 1);
ShaderGetStorageBuffer(&bind->storage_buffers, user_data->sharp_resource_offset[3][slot].offset_dw, slot,
ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, user_data->sharp_resource_offset[3][slot].offset_dw, slot,
ShaderStorageUsage::Constant, user_sgpr, extended_buffer);
info->storage_buffers_constant++;
}
}
// KYTY_NOT_IMPLEMENTED;
for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++)
{
if (direct_sgprs[i])
{
ShaderGetDirectSgpr(&bind->direct_sgprs, i, user_sgpr);
info->direct_sgprs++;
}
}
}
//// NOLINTNEXTLINE(readability-function-cognitive-complexity)
// void ShaderParseUsageCS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr)
//{
// KYTY_PROFILER_FUNCTION();
//
// EXIT_IF(bind == nullptr);
// EXIT_IF(info == nullptr);
//
// const auto* src = reinterpret_cast<const uint32_t*>(addr);
//
// auto usages = GetUsageSlots(src);
//
// info->fetch = false;
// info->fetch_reg = 0;
// info->vertex_buffer = false;
// info->vertex_buffer_reg = 0;
//
// uint32_t* extended_buffer = nullptr;
//
// for (int i = 0; i < usages.slots_num; i++)
// {
// const auto& usage = usages.slots[i];
// switch (usage.type)
// {
// // case 0x00:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3);
// // if (usage.flags == 0)
// // {
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); }
// // else if (usage.flags
// // == 3)
// // {
// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot,
// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer);
// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type()
// //!= 9);
// // }
// // break;
// // case 0x02:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break;
// // case 0x04:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3);
// // if (usage.flags == 0)
// // {
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::ReadWrite, user_sgpr, extended_buffer); }
// // else if (usage.flags
// // == 3)
// // {
// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot,
// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer);
// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type()
// //!= 9);
// // }
// // break;
// case 0x07:
// EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer);
// break;
// // case 0x1b:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // EXIT_NOT_IMPLEMENTED(usage.slot != 1);
// // EXIT_NOT_IMPLEMENTED(bind->extended.used);
// // bind->extended.used = true;
// // bind->extended.slot = usage.slot;
// // bind->extended.start_register = usage.start_register;
// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register];
// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1];
// // extended_buffer = reinterpret_cast<uint32_t*>(bind->extended.data.Base());
// // break;
// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
// }
// }
// }
//
//// NOLINTNEXTLINE(readability-function-cognitive-complexity)
// void ShaderParseUsagePS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr)
//{
// KYTY_PROFILER_FUNCTION();
//
// EXIT_IF(bind == nullptr);
// EXIT_IF(info == nullptr);
//
// const auto* src = reinterpret_cast<const uint32_t*>(addr);
//
// auto usages = GetUsageSlots(src);
//
// info->fetch = false;
// info->fetch_reg = 0;
// info->vertex_buffer = false;
// info->vertex_buffer_reg = 0;
//
// uint32_t* extended_buffer = nullptr;
//
// for (int i = 0; i < usages.slots_num; i++)
// {
// const auto& usage = usages.slots[i];
// switch (usage.type)
// {
// // case 0x00:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3);
// // if (usage.flags == 0)
// // {
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); }
// // else if (usage.flags
// // == 3)
// // {
// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot,
// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer);
// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type()
// //!= 9);
// // }
// // break;
// // case 0x01:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer);
// // break;
// // case 0x02:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break;
// // case 0x04:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 3);
// // if (usage.flags == 3)
// // {
// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot,
// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer);
// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type()
// //!= 9);
// // }
// // break;
// // case 0x1b:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // EXIT_NOT_IMPLEMENTED(usage.slot != 1);
// // EXIT_NOT_IMPLEMENTED(bind->extended.used);
// // bind->extended.used = true;
// // bind->extended.slot = usage.slot;
// // bind->extended.start_register = usage.start_register;
// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register];
// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1];
// // extended_buffer = reinterpret_cast<uint32_t*>(bind->extended.data.Base());
// // break;
// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
// }
// }
// }
//
// void ShaderParseUsageVS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr)
//{
// KYTY_PROFILER_FUNCTION();
//
// EXIT_IF(bind == nullptr);
// EXIT_IF(info == nullptr);
//
// const auto* src = reinterpret_cast<const uint32_t*>(addr);
//
// auto usages = GetUsageSlots(src);
//
// info->fetch = false;
// info->fetch_reg = 0;
// info->vertex_buffer = false;
// info->vertex_buffer_reg = 0;
//
// for (int i = 0; i < usages.slots_num; i++)
// {
// const auto& usage = usages.slots[i];
// switch (usage.type)
// {
// // case 0x02:
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot,
// // ShaderStorageUsage::Constant, user_sgpr, nullptr); break;
// // case 0x12:
// // EXIT_NOT_IMPLEMENTED(usage.slot != 0);
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // info->fetch = true;
// // info->fetch_reg = usage.start_register;
// // break;
// // case 0x17:
// // EXIT_NOT_IMPLEMENTED(usage.slot != 0);
// // EXIT_NOT_IMPLEMENTED(usage.flags != 0);
// // info->vertex_buffer = true;
// // info->vertex_buffer_reg = usage.start_register;
// // break;
// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
// }
// }
// }
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderVertexInputInfo* info)
{
@ -1764,8 +1648,9 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi
bool gs_instead_of_vs =
(regs->vs_regs.data_addr == 0 && regs->gs_regs.data_addr == 0 && regs->es_regs.data_addr != 0 && regs->gs_regs.chksum != 0);
uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr);
const HW::UserSgprInfo& user_sgpr = (gs_instead_of_vs ? regs->gs_user_sgpr : regs->vs_user_sgpr);
uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr);
const HW::UserSgprInfo& user_sgpr = (gs_instead_of_vs ? regs->gs_user_sgpr : regs->vs_user_sgpr);
auto user_sgpr_num = (gs_instead_of_vs ? regs->gs_regs.rsrc2.user_sgpr : regs->vs_regs.rsrc2.user_sgpr);
bool ps5 = Config::IsNextGen();
@ -1786,14 +1671,14 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi
info->gs_prolog = true;
ShaderParseUsage2(data.user_data, &usage, &info->bind, user_sgpr);
ShaderParseUsage2(data.user_data, &usage, &info->bind, user_sgpr, static_cast<int>(user_sgpr_num));
} else
{
EXIT_NOT_IMPLEMENTED(gs_instead_of_vs);
info->gs_prolog = false;
ShaderParseUsage(shader_addr, &usage, &info->bind, user_sgpr);
ShaderParseUsage(shader_addr, &usage, &info->bind, user_sgpr, user_sgpr_num);
}
EXIT_NOT_IMPLEMENTED(usage.extended_buffer);
@ -1812,6 +1697,9 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi
info->fetch_attrib_reg = usage.vertex_attrib_reg;
info->fetch_buffer_reg = usage.vertex_buffer_reg;
EXIT_NOT_IMPLEMENTED(usage.vertex_attrib_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX);
EXIT_NOT_IMPLEMENTED(usage.vertex_buffer_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX);
const auto* attrib =
reinterpret_cast<const uint32_t*>(static_cast<uint64_t>(user_sgpr.value[usage.vertex_attrib_reg]) |
(static_cast<uint64_t>(user_sgpr.value[usage.vertex_attrib_reg + 1]) << 32u));
@ -1835,14 +1723,14 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi
info->fetch_shader_reg = usage.fetch_reg;
info->fetch_buffer_reg = usage.vertex_buffer_reg;
EXIT_NOT_IMPLEMENTED(usage.fetch_reg >= 16 || usage.vertex_buffer_reg >= 16);
EXIT_NOT_IMPLEMENTED(usage.fetch_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX);
EXIT_NOT_IMPLEMENTED(usage.vertex_buffer_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX);
const auto* fetch =
reinterpret_cast<const uint32_t*>(static_cast<uint64_t>(regs->vs_user_sgpr.value[usage.fetch_reg]) |
(static_cast<uint64_t>(regs->vs_user_sgpr.value[usage.fetch_reg + 1]) << 32u));
const auto* fetch = reinterpret_cast<const uint32_t*>(static_cast<uint64_t>(user_sgpr.value[usage.fetch_reg]) |
(static_cast<uint64_t>(user_sgpr.value[usage.fetch_reg + 1]) << 32u));
const auto* buffer =
reinterpret_cast<const uint32_t*>(static_cast<uint64_t>(regs->vs_user_sgpr.value[usage.vertex_buffer_reg]) |
(static_cast<uint64_t>(regs->vs_user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u));
reinterpret_cast<const uint32_t*>(static_cast<uint64_t>(user_sgpr.value[usage.vertex_buffer_reg]) |
(static_cast<uint64_t>(user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u));
EXIT_NOT_IMPLEMENTED(fetch == nullptr || buffer == nullptr);
@ -1906,15 +1794,16 @@ void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegis
{
EXIT_NOT_IMPLEMENTED(data.user_data == nullptr);
ShaderParseUsage2(data.user_data, &usage, &ps_info->bind, regs->ps_user_sgpr);
ShaderParseUsage2(data.user_data, &usage, &ps_info->bind, regs->ps_user_sgpr, regs->ps_regs.rsrc2.user_sgpr);
} else
{
ShaderParseUsage(regs->ps_regs.data_addr, &usage, &ps_info->bind, regs->ps_user_sgpr);
ShaderParseUsage(regs->ps_regs.data_addr, &usage, &ps_info->bind, regs->ps_user_sgpr, regs->ps_regs.rsrc2.user_sgpr);
}
EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib);
EXIT_NOT_IMPLEMENTED(usage.storage_buffers_readwrite > 0);
EXIT_NOT_IMPLEMENTED(usage.gds_pointers > 0);
EXIT_NOT_IMPLEMENTED(usage.direct_sgprs > 0);
ShaderCalcBindingIndices(&ps_info->bind);
}
@ -1940,10 +1829,11 @@ void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderReg
ShaderParsedUsage usage;
ShaderParseUsage(regs->cs_regs.data_addr, &usage, &info->bind, regs->cs_user_sgpr);
ShaderParseUsage(regs->cs_regs.data_addr, &usage, &info->bind, regs->cs_user_sgpr, regs->cs_regs.user_sgpr);
EXIT_NOT_IMPLEMENTED(usage.samplers > 0);
EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib);
EXIT_NOT_IMPLEMENTED(usage.direct_sgprs > 0);
ShaderCalcBindingIndices(&info->bind);
}
@ -1963,6 +1853,7 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind)
printf("\t samplers.binding_index = %d\n", bind.samplers.binding_index);
printf("\t gds_pointers.pointers_num = %d\n", bind.gds_pointers.pointers_num);
printf("\t gds_pointers.binding_index = %d\n", bind.gds_pointers.binding_index);
printf("\t direct_sgprs.sgprs_num = %d\n", bind.direct_sgprs.sgprs_num);
printf("\t extended.used = %s\n", (bind.extended.used ? "true" : "false"));
printf("\t extended.slot = %d\n", bind.extended.slot);
printf("\t extended.start_register = %d\n", bind.extended.start_register);
@ -2129,6 +2020,17 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind)
printf("\t\t start_register = %d\n", bind.gds_pointers.start_register[i]);
printf("\t\t extended = %s\n", (bind.gds_pointers.extended[i] ? "true" : "false"));
}
for (int i = 0; i < bind.direct_sgprs.sgprs_num; i++)
{
const auto& r = bind.direct_sgprs.sgprs[i];
printf("\t Direct Sgprs %d\n", i);
printf("\t\t field = %08" PRIx32 "\n", r.field);
printf("\t\t start_register = %d\n", bind.direct_sgprs.start_register[i]);
}
}
void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info)
@ -2412,7 +2314,13 @@ ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegis
vs_print("ShaderParseVS()", *regs, *sh);
vs_check(*regs, *sh);
EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count);
if (gs_instead_of_vs)
{
EXIT_NOT_IMPLEMENTED(regs->gs_regs.rsrc2.user_sgpr > regs->gs_user_sgpr.count);
} else
{
EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count);
}
if (Config::IsNextGen())
{
@ -2871,6 +2779,13 @@ static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind)
ret->ids.Add(static_cast<uint32_t>(bind.gds_pointers.extended[i]));
}
ret->ids.Add(bind.direct_sgprs.sgprs_num);
for (int i = 0; i < bind.direct_sgprs.sgprs_num; i++)
{
ret->ids.Add(bind.direct_sgprs.start_register[i]);
}
ret->ids.Add(static_cast<uint32_t>(bind.extended.used));
ret->ids.Add(bind.extended.slot);
ret->ids.Add(bind.extended.start_register);

View file

@ -160,31 +160,30 @@ KYTY_SHADER_PARSER(shader_parse_sopk)
inst.pc = pc;
inst.dst = operand_parse(sdst);
inst.format = ShaderInstructionFormat::SVdstSVsrc0;
inst.src[0].type = ShaderOperandType::IntegerInlineConstant;
inst.src[0].constant.i = imm;
inst.src_num = 1;
switch (opcode)
{
case 0x00:
inst.type = ShaderInstructionType::SMovkI32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0;
inst.src[0].type = ShaderOperandType::IntegerInlineConstant;
inst.src[0].constant.i = imm;
inst.src_num = 1;
break;
case 0x00: inst.type = ShaderInstructionType::SMovkI32; break;
case 0x2: KYTY_NI("s_cmovk_i32"); break;
case 0x3: KYTY_NI("s_cmpk_eq_i32"); break;
case 0x4: KYTY_NI("s_cmpk_lg_i32"); break;
case 0x5: KYTY_NI("s_cmpk_gt_i32"); break;
case 0x6: KYTY_NI("s_cmpk_ge_i32"); break;
case 0x7: KYTY_NI("s_cmpk_lt_i32"); break;
case 0x8: KYTY_NI("s_cmpk_le_i32"); break;
case 0x9: KYTY_NI("s_cmpk_eq_u32"); break;
case 0xA: KYTY_NI("s_cmpk_lg_u32"); break;
case 0xB: KYTY_NI("s_cmpk_gt_u32"); break;
case 0xC: KYTY_NI("s_cmpk_ge_u32"); break;
case 0xD: KYTY_NI("s_cmpk_lt_u32"); break;
case 0xE: KYTY_NI("s_cmpk_le_u32"); break;
case 0xF: KYTY_NI("s_addk_i32"); break;
case 0x10: KYTY_NI("s_mulk_i32"); break;
case 0x02: KYTY_NI("s_cmovk_i32"); break;
case 0x03: KYTY_NI("s_cmpk_eq_i32"); break;
case 0x04: KYTY_NI("s_cmpk_lg_i32"); break;
case 0x05: KYTY_NI("s_cmpk_gt_i32"); break;
case 0x06: KYTY_NI("s_cmpk_ge_i32"); break;
case 0x07: KYTY_NI("s_cmpk_lt_i32"); break;
case 0x08: KYTY_NI("s_cmpk_le_i32"); break;
case 0x09: KYTY_NI("s_cmpk_eq_u32"); break;
case 0x0A: KYTY_NI("s_cmpk_lg_u32"); break;
case 0x0B: KYTY_NI("s_cmpk_gt_u32"); break;
case 0x0C: KYTY_NI("s_cmpk_ge_u32"); break;
case 0x0D: KYTY_NI("s_cmpk_lt_u32"); break;
case 0x0E: KYTY_NI("s_cmpk_le_u32"); break;
case 0x0F: KYTY_NI("s_addk_i32"); break;
case 0x10: inst.type = ShaderInstructionType::SMulkI32; break;
case 0x11: KYTY_NI("s_cbranch_i_fork"); break;
case 0x12: KYTY_NI("s_getreg_b32"); break;
case 0x13: KYTY_NI("s_setreg_b32"); break;
@ -579,6 +578,7 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
case 0x32: KYTY_NI("s_pack_ll_b32_b16"); break;
case 0x33: KYTY_NI("s_pack_lh_b32_b16"); break;
case 0x34: KYTY_NI("s_pack_hh_b32_b16"); break;
case 0x35: inst.type = ShaderInstructionType::SMulHiU32; break;
default: KYTY_UNKNOWN_OP();
}
@ -601,22 +601,53 @@ KYTY_SHADER_PARSER(shader_parse_vopc)
uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu;
uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu;
bool sdwa = (src0 == 249);
uint32_t size = (sdwa ? 2 : 1);
src0 = (sdwa ? (buffer[1] >> 0u) & 0xffu : src0);
uint32_t sdst = (sdwa ? (buffer[1] >> 8u) & 0x7fu : 0);
uint32_t sd = (sdwa ? (buffer[1] >> 15u) & 0x1u : 0);
uint32_t src0_sel = (sdwa ? (buffer[1] >> 16u) & 0x7u : 6);
uint32_t src0_sext = (sdwa ? (buffer[1] >> 19u) & 0x1u : 0);
uint32_t src0_neg = (sdwa ? (buffer[1] >> 20u) & 0x1u : 0);
uint32_t src0_abs = (sdwa ? (buffer[1] >> 21u) & 0x1u : 0);
uint32_t s0 = (sdwa ? (buffer[1] >> 23u) & 0x1u : 1);
uint32_t src1_sel = (sdwa ? (buffer[1] >> 24u) & 0x7u : 6);
uint32_t src1_sext = (sdwa ? (buffer[1] >> 27u) & 0x1u : 0);
uint32_t src1_neg = (sdwa ? (buffer[1] >> 28u) & 0x1u : 0);
uint32_t src1_abs = (sdwa ? (buffer[1] >> 29u) & 0x1u : 0);
uint32_t s1 = (sdwa ? (buffer[1] >> 31u) & 0x1u : 0);
EXIT_NOT_IMPLEMENTED(src0_sel != 6);
EXIT_NOT_IMPLEMENTED(src0_sext != 0);
EXIT_NOT_IMPLEMENTED(src0_neg != 0);
EXIT_NOT_IMPLEMENTED(src0_abs != 0);
EXIT_NOT_IMPLEMENTED(src1_sel != 6);
EXIT_NOT_IMPLEMENTED(src1_sext != 0);
EXIT_NOT_IMPLEMENTED(src1_neg != 0);
EXIT_NOT_IMPLEMENTED(src1_abs != 0);
ShaderInstruction inst;
inst.pc = pc;
inst.src[0] = operand_parse(src0);
inst.src[1] = operand_parse(vsrc1 + 256);
inst.src[0] = operand_parse(src0 + (s0 == 0 ? 256 : 0));
inst.src[1] = operand_parse(vsrc1 + (s1 == 0 ? 256 : 0));
inst.src_num = 2;
uint32_t size = 1;
if (inst.src[0].type == ShaderOperandType::LiteralConstant)
{
inst.src[0].constant.u = buffer[size];
size++;
}
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.dst.type = ShaderOperandType::VccLo;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
if (sd == 0)
{
inst.dst.type = ShaderOperandType::VccLo;
} else
{
inst.dst = operand_parse(sdst);
}
inst.dst.size = 2;
switch (opcode)
@ -1034,14 +1065,51 @@ KYTY_SHADER_PARSER(shader_parse_vop2)
uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu;
uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu;
bool sdwa = (src0 == 249);
uint32_t size = (sdwa ? 2 : 1);
src0 = (sdwa ? (buffer[1] >> 0u) & 0xffu : src0);
uint32_t dst_sel = (sdwa ? (buffer[1] >> 8u) & 0x7u : 6);
uint32_t dst_u = (sdwa ? (buffer[1] >> 11u) & 0x3u : 2);
uint32_t clmp = (sdwa ? (buffer[1] >> 13u) & 0x1u : 0);
uint32_t omod = (sdwa ? (buffer[1] >> 14u) & 0x3u : 0);
uint32_t src0_sel = (sdwa ? (buffer[1] >> 16u) & 0x7u : 6);
uint32_t src0_sext = (sdwa ? (buffer[1] >> 19u) & 0x1u : 0);
uint32_t src0_neg = (sdwa ? (buffer[1] >> 20u) & 0x1u : 0);
uint32_t src0_abs = (sdwa ? (buffer[1] >> 21u) & 0x1u : 0);
uint32_t s0 = (sdwa ? (buffer[1] >> 23u) & 0x1u : 1);
uint32_t src1_sel = (sdwa ? (buffer[1] >> 24u) & 0x7u : 6);
uint32_t src1_sext = (sdwa ? (buffer[1] >> 27u) & 0x1u : 0);
uint32_t src1_neg = (sdwa ? (buffer[1] >> 28u) & 0x1u : 0);
uint32_t src1_abs = (sdwa ? (buffer[1] >> 29u) & 0x1u : 0);
uint32_t s1 = (sdwa ? (buffer[1] >> 31u) & 0x1u : 0);
EXIT_NOT_IMPLEMENTED(dst_sel != 6);
EXIT_NOT_IMPLEMENTED(sdwa && dst_sel == 6 && dst_u != 0);
EXIT_NOT_IMPLEMENTED(omod != 0);
EXIT_NOT_IMPLEMENTED(src0_sel != 6);
EXIT_NOT_IMPLEMENTED(src0_sext != 0);
EXIT_NOT_IMPLEMENTED(src0_neg != 0);
EXIT_NOT_IMPLEMENTED(src1_sel != 6);
EXIT_NOT_IMPLEMENTED(src1_sext != 0);
EXIT_NOT_IMPLEMENTED(src1_neg != 0);
ShaderInstruction inst;
inst.pc = pc;
inst.src[0] = operand_parse(src0);
inst.src[1] = operand_parse(vsrc1 + 256);
inst.src[0] = operand_parse(src0 + (s0 == 0 ? 256 : 0));
inst.src[1] = operand_parse(vsrc1 + (s1 == 0 ? 256 : 0));
inst.dst = operand_parse(vdst + 256);
inst.src_num = 2;
uint32_t size = 1;
switch (omod)
{
case 0: inst.dst.multiplier = 1.0f; break;
case 1: inst.dst.multiplier = 2.0f; break;
case 2: inst.dst.multiplier = 4.0f; break;
case 3: inst.dst.multiplier = 0.5f; break;
default: break;
}
if (inst.src[0].type == ShaderOperandType::LiteralConstant)
{
@ -1049,6 +1117,11 @@ KYTY_SHADER_PARSER(shader_parse_vop2)
size++;
}
inst.src[0].absolute = (src0_abs != 0);
inst.src[1].absolute = (src1_abs != 0);
inst.dst.clamp = (clmp != 0);
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
switch (opcode)
@ -3252,17 +3325,18 @@ KYTY_SHADER_PARSER(shader_parse_vintrp)
inst.src[2].constant.u = chan;
inst.src_num = 3;
inst.format = ShaderInstructionFormat::VdstVsrcAttrChan;
switch (opcode)
{
case 0x00:
inst.type = ShaderInstructionType::VInterpP1F32;
inst.format = ShaderInstructionFormat::VdstVsrcAttrChan;
case 0x00: inst.type = ShaderInstructionType::VInterpP1F32; break;
case 0x01: inst.type = ShaderInstructionType::VInterpP2F32; break;
case 0x02:
inst.type = ShaderInstructionType::VInterpMovF32;
inst.src[0].type = ShaderOperandType::IntegerInlineConstant;
inst.src[0].constant.u = vsrc & 0x3u;
inst.src[0].size = 0;
break;
case 0x01:
inst.type = ShaderInstructionType::VInterpP2F32;
inst.format = ShaderInstructionFormat::VdstVsrcAttrChan;
break;
case 0x02: KYTY_NI("v_interp_mov_f32"); break;
default: KYTY_UNKNOWN_OP();
}

View file

@ -1792,8 +1792,6 @@ static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String8& re
{
EXIT_IF(load == nullptr);
// EXIT_NOT_IMPLEMENTED(op.negate);
String8 l;
if (operand_is_constant(op))
@ -1824,8 +1822,12 @@ static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String8& re
if (op.negate && op.absolute)
{
// TODO(): negated absolute value
return false;
l += String8(' ', 10) + String8("%abs_<index> = OpExtInst %float %GLSL_std_450 FAbs %<result_id>\n") + String8(' ', 10) +
String8("%<result> = OpFNegate %float %abs_<index>\n");
*load = l.ReplaceStr("<index>", index).ReplaceStr("<result_id>", "a" + result_id).ReplaceStr("<result>", result_id);
return true;
}
if (op.absolute)
@ -3615,7 +3617,7 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1)
return true;
}
/* XXX: Add, Addc, Bfe, Lshl4Add */
/* XXX: Add, Addc, Bfe, Lshl4Add, MulHi */
KYTY_RECOMPILER_FUNC(Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -4432,6 +4434,49 @@ KYTY_RECOMPILER_FUNC(Recompile_SLoadDwordx8_Sdst8SbaseSoffset)
return false;
}
KYTY_RECOMPILER_FUNC(Recompile_SMulkI32_SVdstSVsrc0)
{
const auto& inst = code.GetInstructions().At(index);
String8 index_str = String8::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint);
EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst));
String8 load0;
if (!operand_load_int(spirv, inst.src[0], "t0_<index>", index_str, &load0))
{
return false;
}
String8 load_dst;
if (!operand_load_int(spirv, inst.dst, "tdst_<index>", index_str, &load_dst))
{
return false;
}
static const char* text = R"(
<load0>
<load_dst>
%t_<index> = OpIMul %int %tdst_<index> %t0_<index>
%tu_<index> = OpBitcast %uint %t_<index>
OpStore %<dst> %tu_<index>
)";
*dst_source += String8(text)
.ReplaceStr("<dst>", dst_value.value)
.ReplaceStr("<load0>", load0)
.ReplaceStr("<load_dst>", load_dst)
.ReplaceStr("<index>", index_str);
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_SMovB32_SVdstSVsrc0)
{
const auto& inst = code.GetInstructions().At(index);
@ -5301,6 +5346,37 @@ KYTY_RECOMPILER_FUNC(Recompile_VInterpP2F32_VdstVsrcAttrChan)
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_VInterpMovF32_VdstVsrcAttrChan)
{
const auto& inst = code.GetInstructions().At(index);
String8 index_str = String8::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[0]));
EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[1]));
EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2]));
EXIT_NOT_IMPLEMENTED(inst.src[0].constant.u != 2);
auto dst_value = operand_variable_to_str(inst.dst);
String8 load0 = String8::FromPrintf("%%t0_<index> = OpAccessChain %%_ptr_Input_float %%attr%u %%uint_%u", inst.src[1].constant.u,
inst.src[2].constant.u);
// TODO() check VSKIP
// TODO() check EXEC
static const char* text = R"(
<load0>
%t1_<index> = OpLoad %float %t0_<index>
OpStore %<dst> %t1_<index>
)";
*dst_source += String8(text).ReplaceStr("<dst>", dst_value.value).ReplaceStr("<load0>", load0).ReplaceStr("<index>", index_str);
return true;
}
/* XXX: Mad, Madak, Madmk, Max3, Min3, Med3, Fma */
KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2)
{
@ -6213,6 +6289,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo
{Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SAddU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%ts_<index> = OpIAddCarry %ResTypeU %t0_<index> %t1_<index>", "%t_<index> = OpCompositeExtract %uint %ts_<index> 0", "%carry_<index> = OpCompositeExtract %uint %ts_<index> 1"}, SccCheck::CarryOut},
{Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SBfeU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%to_<index> = OpBitFieldUExtract %uint %t1_<index> %uint_0 %uint_5", "%ts_<index> = OpBitFieldUExtract %uint %t1_<index> %uint_16 %uint_7", "%t_<index> = OpBitFieldUExtract %uint %t0_<index> %to_<index> %ts_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SLshl4AddU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%ts_<index> = OpFunctionCall %v2uint %lshl_add %t0_<index> %t1_<index> %uint_4", "%t_<index> = OpCompositeExtract %uint %ts_<index> 0", "%carry_<index> = OpCompositeExtract %uint %ts_<index> 1"}, SccCheck::CarryOut},
{Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SMulHiU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%t_<index> = OpFunctionCall %uint %mul_hi_uint %t0_<index> %t1_<index>"}, SccCheck::None},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VAndB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%t_<index> = OpBitwiseAnd %uint %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VBcntU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%tb_<index> = OpBitCount %int %t0_<index>", "%tbu_<index> = OpBitcast %uint %tb_<index>", "%t_<index> = OpIAdd %uint %tbu_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VBfmB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%tcount_<index> = OpBitwiseAnd %uint %t0_<index> %uint_31", "%toffset_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", "%t_<index> = OpBitFieldInsert %uint %uint_0 %uint_0xffffffff %toffset_<index> %tcount_<index>"}},
@ -6242,6 +6319,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo
{Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovB32, ShaderInstructionFormat::SVdstSVsrc0, {""}},
{Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovkI32, ShaderInstructionFormat::SVdstSVsrc0, {""}},
{Recompile_SMulkI32_SVdstSVsrc0, ShaderInstructionType::SMulkI32, ShaderInstructionFormat::SVdstSVsrc0, {""}},
{Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VBfrevB32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_<index> = OpBitReverse %uint %t0_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VNotB32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_<index> = OpNot %uint %t0_<index>"}},
{Recompile_V_XXX_F32_SVdstSVsrc0, ShaderInstructionType::VCeilF32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_<index> = OpExtInst %float %GLSL_std_450 Ceil %t0_<index>"}},
@ -6338,6 +6416,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo
{Recompile_VCndmaskB32_VdstVsrc0Vsrc1Smask2, ShaderInstructionType::VCndmaskB32, ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2, {""}},
{Recompile_VInterpMovF32_VdstVsrcAttrChan, ShaderInstructionType::VInterpMovF32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}},
{Recompile_VInterpP1F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP1F32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}},
{Recompile_VInterpP2F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP2F32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}},
@ -6768,9 +6847,16 @@ void Spirv::WriteAnnotations()
{
for (uint32_t i = 0; i < m_ps_input_info->input_num; i++)
{
EXIT_NOT_IMPLEMENTED((m_ps_input_info->interpolator_settings[i] & ~static_cast<uint32_t>(0x1f)) != 0);
EXIT_NOT_IMPLEMENTED((m_ps_input_info->interpolator_settings[i] & ~static_cast<uint32_t>(0x41fu)) != 0);
vars.Add(String8::FromPrintf("OpDecorate %%attr%d Location %d", i, m_ps_input_info->interpolator_settings[i]));
bool flat = (m_ps_input_info->interpolator_settings[i] & 0x400u) != 0;
uint32_t location = m_ps_input_info->interpolator_settings[i] & 0x1fu;
if (flat)
{
vars.Add(String8::FromPrintf("OpDecorate %%attr%u Flat", i));
}
vars.Add(String8::FromPrintf("OpDecorate %%attr%u Location %u", i, location));
}
if (m_ps_input_info->ps_pos_xy)
{
@ -7424,7 +7510,21 @@ void Spirv::WriteLocalVariables()
}
}
/* buffer_index += (m_bind->gds_pointers.pointers_num > 0 ? (m_bind->gds_pointers.pointers_num - 1) / 4 + 1 : 0); */
buffer_index += (m_bind->gds_pointers.pointers_num > 0 ? (m_bind->gds_pointers.pointers_num - 1) / 4 + 1 : 0);
for (int i = 0; i < m_bind->direct_sgprs.sgprs_num; i++)
{
int start_reg = m_bind->direct_sgprs.start_register[i];
EXIT_IF(buffer_index + i / 4 >= static_cast<int>(m_bind->push_constant_size) / 16);
String8 buffer = String8::FromPrintf("%d", buffer_index + i / 4);
String8 reg = String8::FromPrintf("s%d", start_reg + shift_regs);
String8 field = String8::FromPrintf("%d", i % 4);
m_source += String8(text).ReplaceStr("<reg>", reg).ReplaceStr("<buffer>", buffer).ReplaceStr("<field>", field);
}
/* buffer_index += (m_bind->direct_sgprs.sgprs_num > 0 ? (m_bind->direct_sgprs.sgprs_num - 1) / 4 + 1 : 0); */
if (m_bind->extended.used)
{
@ -7781,7 +7881,7 @@ void Spirv::WriteFunctions()
}
if (m_code.HasAnyOf({ShaderInstructionType::VMulLoI32, ShaderInstructionType::VMulLoU32, ShaderInstructionType::VMulHiU32,
ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24}))
ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24, ShaderInstructionType::SMulHiU32}))
{
m_source += FUNC_MUL_EXTENDED;
}

View file

@ -17,12 +17,6 @@
#define KYTY_CFG_GET(n) n = s->value(#n).value<decltype(n)>();
#define KYTY_CFG_GETL(n) n = s->value(#n).toStringList();
//#define KYTY_LIBS \
// { \
// "libc_internal_1", "libkernel_1", "libVideoOut_1", "libSysmodule_1", "libDiscMap_1", "libDebug_1", "libGraphicsDriver_1", \
// "libUserService_1", "libSystemService_1", "libPad_1", "libNet_1", "libDialog_1", "libAudio_1", "libPlayGo_1", "libSaveData_1", \
// "libAppContent_1" \
// }
template <class T>
inline QStringList EnumToList()