Improve shader recompiler

This commit is contained in:
InoriRus 2021-12-13 15:42:20 +10:00
parent ba00d71a33
commit d0c1c8b1c8
6 changed files with 622 additions and 349 deletions

View file

@ -79,7 +79,7 @@ if (KYTY_LINKER STREQUAL LD)
set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000")
endif()
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.2)
project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.0.3)
include(src_script.cmake)

View file

@ -36,6 +36,7 @@ enum class ShaderInstructionType
BufferLoadFormatXyzw,
BufferStoreDword,
BufferStoreFormatX,
BufferStoreFormatXy,
DsAppend,
DsConsume,
Exp,
@ -44,6 +45,7 @@ enum class ShaderInstructionType
SAndB32,
SAndn2B64,
SAndSaveexecB64,
SBfmB32,
SBufferLoadDword,
SBufferLoadDwordx16,
SBufferLoadDwordx2,
@ -52,6 +54,12 @@ enum class ShaderInstructionType
SCbranchExecz,
SCbranchScc0,
SCmpEqU32,
SCmpGeU32,
SCmpGeI32,
SCmpLeU32,
SCmpLgU32,
SCselectB32,
SCselectB64,
SEndpgm,
SLoadDwordx4,
SLoadDwordx8,
@ -62,7 +70,6 @@ enum class ShaderInstructionType
SMulI32,
SNorB64,
SOrB64,
SCselectB32,
SSetpcB64,
SSwappcB64,
SWaitcnt,
@ -70,13 +77,19 @@ enum class ShaderInstructionType
TBufferLoadFormatXyzw,
VAddI32,
VAndB32,
VAshrI32,
VAshrrevI32,
VBcntU32B32,
VBfeU32,
VCmpEqF32,
VCmpEqI32,
VCmpEqU32,
VCmpGeF32,
VCmpGeU32,
VCmpGtI32,
VCmpLeF32,
VCmpLeU32,
VCmpNeI32,
VCmpNeqF32,
VCmpNeU32,
VCmpxEqU32,
@ -92,12 +105,14 @@ enum class ShaderInstructionType
VCvtU32F32,
VInterpP1F32,
VInterpP2F32,
VLshlB32,
VLshlrevB32,
VLshrB32,
VLshrrevB32,
VMacF32,
VMadakF32,
VMadF32,
VMadmkF32,
VMadU32U24,
VMaxF32,
VMbcntHiU32B32,
@ -105,7 +120,9 @@ enum class ShaderInstructionType
VMinF32,
VMovB32,
VMulF32,
VMulHiU32,
VMulLoI32,
VMulLoU32,
VMulU32U24,
VNotB32,
VRcpF32,
@ -203,7 +220,6 @@ enum Format : uint64_t
SVdstSVsrc0 = FormatDefine({D, S0}),
SVdstSVsrc0SVsrc1 = FormatDefine({D, S0, S1}),
Sdst2Ssrc02 = FormatDefine({DA2, S0A2}),
Sdst2Ssrc0 = FormatDefine({DA2, S0}),
Sdst2Ssrc02Ssrc12 = FormatDefine({DA2, S0A2, S1A2}),
SmaskVsrc0Vsrc1 = FormatDefine({DA2, S0, S1}),
Ssrc0Ssrc1 = FormatDefine({S0, S1}),

View file

@ -2742,6 +2742,8 @@ static void PrepareStorageBuffers(const ShaderStorageResources& storage_buffers,
r.Dfmt() == 4 && r.Nfmt() == 4) ||
(r.Stride() == 4 && r.DstSelX() == 4 && r.DstSelY() == 0 && r.DstSelZ() == 0 && r.DstSelW() == 1 &&
r.Dfmt() == 4 && r.Nfmt() == 7) ||
(r.Stride() == 8 && r.DstSelX() == 4 && r.DstSelY() == 5 && r.DstSelZ() == 0 && r.DstSelW() == 0 &&
r.Dfmt() == 11 && r.Nfmt() == 4) ||
(r.Stride() == 16 && r.DstSelX() == 4 && r.DstSelY() == 5 && r.DstSelZ() == 6 && r.DstSelW() == 7 &&
r.Dfmt() == 14 && r.Nfmt() == 7)));
EXIT_NOT_IMPLEMENTED(!(r.MemoryType() == 0x00 || r.MemoryType() == 0x10 || r.MemoryType() == 0x6d));

View file

@ -916,6 +916,9 @@ void ComputeRing::DingDong(uint32_t offset_dw)
EXIT_IF(!m_active);
WindowWaitForGraphicInitialized();
GraphicsRenderCreateContext();
if (m_done)
{
while (!m_idle)

View file

@ -21,6 +21,9 @@
#include <string>
#include <vector>
//#define SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
//#include "spirv_cross/spirv_glsl.hpp"
#ifdef KYTY_EMU_ENABLED
#define KYTY_SHADER_PARSER_ARGS \
@ -141,6 +144,12 @@ static String operand_array_to_str(ShaderOperand op, int n)
break;
case ShaderOperandType::Sgpr: ret = String::FromPrintf("s[%d:%d]", op.register_id, op.register_id + n - 1); break;
case ShaderOperandType::Vgpr: ret = String::FromPrintf("v[%d:%d]", op.register_id, op.register_id + n - 1); break;
case ShaderOperandType::IntegerInlineConstant:
if (n == 2)
{
ret = String::FromPrintf("%d", op.constant.i);
}
break;
default: break;
}
@ -179,7 +188,6 @@ static String dbg_fmt_to_str(const ShaderInstruction& inst)
case ShaderInstructionFormat::Sdst16SvSoffset: return U"Sdst16SvSoffset"; break;
case ShaderInstructionFormat::SVdstSVsrc0: return U"SVdstSVsrc0"; break;
case ShaderInstructionFormat::Sdst2Ssrc02: return U"Sdst2Ssrc02"; break;
case ShaderInstructionFormat::Sdst2Ssrc0: return U"Sdst2Ssrc0"; break;
case ShaderInstructionFormat::Sdst2Ssrc02Ssrc12: return U"Sdst2Ssrc02Ssrc12"; break;
case ShaderInstructionFormat::SmaskVsrc0Vsrc1: return U"SmaskVsrc0Vsrc1"; break;
case ShaderInstructionFormat::Ssrc0Ssrc1: return U"Ssrc0Ssrc1"; break;
@ -428,9 +436,13 @@ KYTY_SHADER_PARSER(shader_parse_sopc)
inst.format = ShaderInstructionFormat::Ssrc0Ssrc1;
switch (opcode) // NOLINT
switch (opcode)
{
case 0x03: inst.type = ShaderInstructionType::SCmpGeI32; break;
case 0x06: inst.type = ShaderInstructionType::SCmpEqU32; break;
case 0x07: inst.type = ShaderInstructionType::SCmpLgU32; break;
case 0x09: inst.type = ShaderInstructionType::SCmpGeU32; break;
case 0x0b: inst.type = ShaderInstructionType::SCmpLeU32; break;
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sopc opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
@ -529,19 +541,10 @@ KYTY_SHADER_PARSER(shader_parse_sop1)
inst.format = ShaderInstructionFormat::SVdstSVsrc0;
break;
case 0x04:
inst.type = ShaderInstructionType::SMovB64;
inst.dst.size = 2;
switch (inst.src[0].type)
{
case ShaderOperandType::VccLo:
case ShaderOperandType::ExecLo:
case ShaderOperandType::Sgpr:
inst.format = ShaderInstructionFormat::Sdst2Ssrc02;
inst.src[0].size = 2;
break;
case ShaderOperandType::IntegerInlineConstant: inst.format = ShaderInstructionFormat::Sdst2Ssrc0; break;
default: EXIT("unknown src0 type");
}
inst.type = ShaderInstructionType::SMovB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02;
inst.dst.size = 2;
inst.src[0].size = 2;
break;
case 0x0a:
inst.type = ShaderInstructionType::SWqmB64;
@ -626,6 +629,13 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
{
case 0x02: inst.type = ShaderInstructionType::SAddI32; break;
case 0x0a: inst.type = ShaderInstructionType::SCselectB32; break;
case 0x0b:
inst.type = ShaderInstructionType::SCselectB64;
inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12;
inst.dst.size = 2;
inst.src[0].size = 2;
inst.src[1].size = 2;
break;
case 0x0e: inst.type = ShaderInstructionType::SAndB32; break;
case 0x11:
inst.type = ShaderInstructionType::SOrB64;
@ -650,6 +660,7 @@ KYTY_SHADER_PARSER(shader_parse_sop2)
break;
case 0x1e: inst.type = ShaderInstructionType::SLshlB32; break;
case 0x20: inst.type = ShaderInstructionType::SLshrB32; break;
case 0x24: inst.type = ShaderInstructionType::SBfmB32; break;
case 0x26: inst.type = ShaderInstructionType::SMulI32; break;
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sop2 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
@ -690,10 +701,15 @@ KYTY_SHADER_PARSER(shader_parse_vopc)
switch (opcode)
{
case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break;
case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break;
case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break;
case 0x82: inst.type = ShaderInstructionType::VCmpEqI32; break;
case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break;
case 0x85: inst.type = ShaderInstructionType::VCmpNeI32; break;
case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break;
case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break;
case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break;
case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break;
case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break;
case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break;
case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break;
@ -797,10 +813,22 @@ KYTY_SHADER_PARSER(shader_parse_vop2)
case 0x10: inst.type = ShaderInstructionType::VMaxF32; break;
case 0x15: inst.type = ShaderInstructionType::VLshrB32; break;
case 0x16: inst.type = ShaderInstructionType::VLshrrevB32; break;
case 0x17: inst.type = ShaderInstructionType::VAshrI32; break;
case 0x18: inst.type = ShaderInstructionType::VAshrrevI32; break;
case 0x19: inst.type = ShaderInstructionType::VLshlB32; break;
case 0x1a: inst.type = ShaderInstructionType::VLshlrevB32; break;
case 0x1b: inst.type = ShaderInstructionType::VAndB32; break;
case 0x1f: inst.type = ShaderInstructionType::VMacF32; break;
case 0x20:
inst.type = ShaderInstructionType::VMadmkF32;
inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2;
inst.src_num = 3;
inst.src[2] = inst.src[1];
inst.src[1].type = ShaderOperandType::LiteralConstant;
inst.src[1].constant.u = buffer[size];
inst.src[1].size = 0;
size++;
break;
case 0x21:
inst.type = ShaderInstructionType::VMadakF32;
inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2;
@ -810,6 +838,7 @@ KYTY_SHADER_PARSER(shader_parse_vop2)
inst.src[2].size = 0;
size++;
break;
case 0x22: inst.type = ShaderInstructionType::VBcntU32B32; break;
case 0x23: inst.type = ShaderInstructionType::VMbcntLoU32B32; break;
case 0x24: inst.type = ShaderInstructionType::VMbcntHiU32B32; break;
case 0x25:
@ -851,24 +880,23 @@ KYTY_SHADER_PARSER(shader_parse_vop3)
uint32_t clamp = (buffer[0] >> 11u) & 0x1u;
uint32_t abs = (buffer[0] >> 8u) & 0x7u;
uint32_t vdst = (buffer[0] >> 0u) & 0xffu;
// uint32_t sdst = (buffer[0] >> 8u) & 0x7fu;
uint32_t neg = (buffer[1] >> 29u) & 0x7u;
uint32_t omod = (buffer[1] >> 27u) & 0x3u;
uint32_t src0 = (buffer[1] >> 0u) & 0x1ffu;
uint32_t src1 = (buffer[1] >> 9u) & 0x1ffu;
uint32_t src2 = (buffer[1] >> 18u) & 0x1ffu;
uint32_t sdst = (buffer[0] >> 8u) & 0x7fu;
uint32_t neg = (buffer[1] >> 29u) & 0x7u;
uint32_t omod = (buffer[1] >> 27u) & 0x3u;
uint32_t src0 = (buffer[1] >> 0u) & 0x1ffu;
uint32_t src1 = (buffer[1] >> 9u) & 0x1ffu;
uint32_t src2 = (buffer[1] >> 18u) & 0x1ffu;
EXIT_NOT_IMPLEMENTED(abs != 0);
// EXIT_NOT_IMPLEMENTED(abs != 0);
// EXIT_NOT_IMPLEMENTED(sdst != 0);
ShaderInstruction inst;
inst.pc = pc;
inst.src[0] = operand_parse(src0);
inst.src[1] = operand_parse(src1);
inst.src[2] = operand_parse(src2);
inst.src_num = 3;
inst.dst = operand_parse(vdst + 256);
inst.dst.clamp = (clamp != 0);
inst.pc = pc;
inst.src[0] = operand_parse(src0);
inst.src[1] = operand_parse(src1);
inst.src[2] = operand_parse(src2);
inst.src_num = 3;
inst.dst = operand_parse(vdst + 256);
switch (omod)
{
@ -914,84 +942,76 @@ KYTY_SHADER_PARSER(shader_parse_vop3)
inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2;
if (opcode >= 0 && opcode <= 0xff)
{
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
}
if (opcode >= 0x100 && opcode <= 0x13d)
{
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
}
switch (opcode)
{
case 0x02:
inst.type = ShaderInstructionType::VCmpEqF32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0x03:
inst.type = ShaderInstructionType::VCmpLeF32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0x84:
inst.type = ShaderInstructionType::VCmpGtI32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0xc2:
inst.type = ShaderInstructionType::VCmpEqU32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0xc3:
inst.type = ShaderInstructionType::VCmpLeU32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0xd4:
inst.type = ShaderInstructionType::VCmpxGtU32;
inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1;
inst.src_num = 2;
inst.dst = operand_parse(vdst);
inst.dst.size = 2;
break;
case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break;
case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break;
case 0x06: inst.type = ShaderInstructionType::VCmpGeF32; break;
case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break;
case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break;
case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break;
case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break;
case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break;
case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break;
case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break;
case 0x100:
inst.type = ShaderInstructionType::VCndmaskB32;
inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2;
inst.src_num = 3;
inst.src[2].size = 2;
break;
case 0x104:
inst.type = ShaderInstructionType::VSubF32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x108:
inst.type = ShaderInstructionType::VMulF32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x11f:
inst.type = ShaderInstructionType::VMacF32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x124:
inst.type = ShaderInstructionType::VMbcntHiU32B32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x12f:
inst.type = ShaderInstructionType::VCvtPkrtzF16F32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
case 0x104: inst.type = ShaderInstructionType::VSubF32; break;
case 0x105: inst.type = ShaderInstructionType::VSubrevF32; break;
case 0x108: inst.type = ShaderInstructionType::VMulF32; break;
case 0x10b: inst.type = ShaderInstructionType::VMulU32U24; break;
case 0x10f: inst.type = ShaderInstructionType::VMinF32; break;
case 0x110: inst.type = ShaderInstructionType::VMaxF32; break;
case 0x115: inst.type = ShaderInstructionType::VLshrB32; break;
case 0x116: inst.type = ShaderInstructionType::VLshrrevB32; break;
case 0x117: inst.type = ShaderInstructionType::VAshrI32; break;
case 0x118: inst.type = ShaderInstructionType::VAshrrevI32; break;
case 0x119: inst.type = ShaderInstructionType::VLshlB32; break;
case 0x11a: inst.type = ShaderInstructionType::VLshlrevB32; break;
case 0x11b: inst.type = ShaderInstructionType::VAndB32; break;
case 0x11f: inst.type = ShaderInstructionType::VMacF32; break;
case 0x122: inst.type = ShaderInstructionType::VBcntU32B32; break;
case 0x123: inst.type = ShaderInstructionType::VMbcntLoU32B32; break;
case 0x124: inst.type = ShaderInstructionType::VMbcntHiU32B32; break;
case 0x125:
inst.type = ShaderInstructionType::VAddI32;
inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1;
inst.dst2 = operand_parse(sdst);
inst.dst2.size = 2;
break;
case 0x12f: inst.type = ShaderInstructionType::VCvtPkrtzF16F32; break;
case 0x141: inst.type = ShaderInstructionType::VMadF32; break;
case 0x143: inst.type = ShaderInstructionType::VMadU32U24; break;
case 0x148: inst.type = ShaderInstructionType::VBfeU32; break;
case 0x15d: inst.type = ShaderInstructionType::VSadU32; break;
case 0x169:
inst.type = ShaderInstructionType::VMulLoU32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x16a:
inst.type = ShaderInstructionType::VMulHiU32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
inst.src_num = 2;
break;
case 0x16b:
inst.type = ShaderInstructionType::VMulLoI32;
inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1;
@ -1000,6 +1020,13 @@ KYTY_SHADER_PARSER(shader_parse_vop3)
default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown vop3 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc);
}
if (inst.dst2.type == ShaderOperandType::Unknown)
{
EXIT_NOT_IMPLEMENTED(abs != 0);
inst.dst.clamp = (clamp != 0);
}
dst->GetInstructions().Add(inst);
return size;
@ -1240,6 +1267,12 @@ KYTY_SHADER_PARSER(shader_parse_mubuf)
inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen;
inst.src[1].size = 4;
break;
case 0x05:
inst.type = ShaderInstructionType::BufferStoreFormatXy;
inst.format = ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen;
inst.src[1].size = 4;
inst.dst.size = 2;
break;
case 0x0c:
inst.type = ShaderInstructionType::BufferLoadDword;
inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen;
@ -1673,11 +1706,11 @@ static void ps_check(const PsStageRegisters& ps)
static void cs_check(const CsStageRegisters& cs)
{
EXIT_NOT_IMPLEMENTED(cs.num_thread_x != 0x00000040);
// EXIT_NOT_IMPLEMENTED(cs.num_thread_x != 0x00000040);
EXIT_NOT_IMPLEMENTED(cs.num_thread_y != 0x00000001);
EXIT_NOT_IMPLEMENTED(cs.num_thread_z != 0x00000001);
EXIT_NOT_IMPLEMENTED(cs.vgprs != 0x00);
EXIT_NOT_IMPLEMENTED(cs.sgprs != 0x01);
// EXIT_NOT_IMPLEMENTED(cs.vgprs != 0x00 && cs.vgprs != 0x01);
// EXIT_NOT_IMPLEMENTED(cs.sgprs != 0x01 && cs.sgprs != 0x02);
EXIT_NOT_IMPLEMENTED(cs.bulky != 0x00);
EXIT_NOT_IMPLEMENTED(cs.scratch_en != 0x00);
// EXIT_NOT_IMPLEMENTED(cs.user_sgpr != 0x0c);
@ -1706,6 +1739,8 @@ static bool SpirvDisassemble(const uint32_t* src_binary, size_t src_binary_size,
uint32_t(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER) | SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES |
SPV_BINARY_TO_TEXT_OPTION_COMMENT | SPV_BINARY_TO_TEXT_OPTION_INDENT | SPV_BINARY_TO_TEXT_OPTION_COLOR))
{
*dst_disassembly = disassembly.c_str();
printf("Disassemble failed\n");
return false;
}
@ -1715,6 +1750,19 @@ static bool SpirvDisassemble(const uint32_t* src_binary, size_t src_binary_size,
return true;
}
static bool SpirvToGlsl(const uint32_t* /*src_binary*/, size_t /*src_binary_size*/, String* /*dst_code*/)
{
// if (dst_code != nullptr)
// {
// spirv_cross::CompilerGLSL glsl(src_binary, src_binary_size);
//
// std::string source = glsl.compile();
//
// *dst_code = source.c_str();
// }
return true;
}
static bool SpirvRun(const String& src, Vector<uint32_t>* dst)
{
EXIT_IF(dst == nullptr);
@ -2216,9 +2264,7 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
auto usages = GetUsageSlots(src);
// bool extended = false;
uint32_t* extended_buffer = nullptr;
// int extended_dw_num = 0;
for (int i = 0; i < usages.slots_num; i++)
{
@ -2231,25 +2277,21 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
{
ShaderGetStorageBuffer(&ps_info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly,
regs->ps_user_sgpr, extended_buffer);
// extended_dw_num += (extended ? 4 : 0);
} else if (usage.flags == 3)
{
ShaderGetTextureBuffer(&ps_info->bind.textures2D, usage.start_register, usage.slot, regs->ps_user_sgpr,
extended_buffer);
// extended_dw_num += (extended ? 8 : 0);
EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.textures[ps_info->bind.textures2D.textures_num - 1].Type() != 9);
}
break;
case 0x01:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetSampler(&ps_info->bind.samplers, usage.start_register, usage.slot, regs->ps_user_sgpr, extended_buffer);
// extended_dw_num += (extended ? 4 : 0);
break;
case 0x02:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetStorageBuffer(&ps_info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant,
regs->ps_user_sgpr, extended_buffer);
// extended_dw_num += (extended ? 4 : 0);
break;
case 0x1b:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
@ -2258,11 +2300,9 @@ void ShaderGetInputInfoPS(const PixelShaderInfo* regs, const ShaderVertexInputIn
ps_info->bind.extended.used = true;
ps_info->bind.extended.slot = usage.slot;
ps_info->bind.extended.start_register = usage.start_register;
// ps_info->bind.extended.dw_num = 0;
ps_info->bind.extended.data.fields[0] = regs->ps_user_sgpr.value[usage.start_register];
ps_info->bind.extended.data.fields[1] = regs->ps_user_sgpr.value[usage.start_register + 1];
// extended = true;
extended_buffer = reinterpret_cast<uint32_t*>(ps_info->bind.extended.data.Base());
extended_buffer = reinterpret_cast<uint32_t*>(ps_info->bind.extended.data.Base());
break;
default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
}
@ -2290,6 +2330,8 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo*
auto usages = GetUsageSlots(src);
uint32_t* extended_buffer = nullptr;
for (int i = 0; i < usages.slots_num; i++)
{
const auto& usage = usages.slots[i];
@ -2298,21 +2340,32 @@ void ShaderGetInputInfoCS(const ComputeShaderInfo* regs, ShaderComputeInputInfo*
case 0x00:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly,
regs->cs_user_sgpr, nullptr);
regs->cs_user_sgpr, extended_buffer);
break;
case 0x02:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant,
regs->cs_user_sgpr, nullptr);
regs->cs_user_sgpr, extended_buffer);
break;
case 0x04:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadWrite,
regs->cs_user_sgpr, nullptr);
regs->cs_user_sgpr, extended_buffer);
break;
case 0x07:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
ShaderGetGdsPointer(&info->bind.gds_pointers, usage.start_register, usage.slot, regs->cs_user_sgpr, nullptr);
ShaderGetGdsPointer(&info->bind.gds_pointers, usage.start_register, usage.slot, regs->cs_user_sgpr, extended_buffer);
break;
case 0x1b:
EXIT_NOT_IMPLEMENTED(usage.flags != 0);
EXIT_NOT_IMPLEMENTED(usage.slot != 1);
EXIT_NOT_IMPLEMENTED(info->bind.extended.used);
info->bind.extended.used = true;
info->bind.extended.slot = usage.slot;
info->bind.extended.start_register = usage.start_register;
info->bind.extended.data.fields[0] = regs->cs_user_sgpr.value[usage.start_register];
info->bind.extended.data.fields[1] = regs->cs_user_sgpr.value[usage.start_register + 1];
extended_buffer = reinterpret_cast<uint32_t*>(info->bind.extended.data.Base());
break;
default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type);
}
@ -2546,13 +2599,13 @@ public:
break;
case Config::ShaderLogDirection::File:
{
String file_name = Config::GetShaderLogFolder().FixDirectorySlash() +
String::FromPrintf("%04d_%04d_shader_%s.log", GraphicsRunGetFrameNum(), id++, type);
Core::File::CreateDirectories(file_name.DirectoryWithoutFilename());
m_file.Create(file_name);
m_file_name = Config::GetShaderLogFolder().FixDirectorySlash() +
String::FromPrintf("%04d_%04d_shader_%s.log", GraphicsRunGetFrameNum(), id++, type);
Core::File::CreateDirectories(m_file_name.DirectoryWithoutFilename());
m_file.Create(m_file_name);
if (m_file.IsInvalid())
{
printf(FG_BRIGHT_RED "Can't create file: %s\n" FG_DEFAULT, file_name.C_Str());
printf(FG_BRIGHT_RED "Can't create file: %s\n" FG_DEFAULT, m_file_name.C_Str());
m_enabled = false;
}
m_enabled = true;
@ -2631,10 +2684,52 @@ public:
}
}
void DumpGlslShader(const Vector<uint32_t>& bin)
{
if (m_enabled)
{
String text;
if (!SpirvToGlsl(bin.GetDataConst(), bin.Size(), &text))
{
EXIT("SpirvToGlsl() failed\n");
}
if (m_console)
{
printf("--------- Glsl Shader ---------\n");
printf("%s\n", text.C_Str());
printf("---------\n");
} else if (!m_file.IsInvalid())
{
m_file.Printf("--------- Glsl Shader ---------\n");
m_file.Printf("%s\n", Log::RemoveColors(text).C_Str());
m_file.Printf("---------\n");
}
}
}
void DumpBinary(const Vector<uint32_t>& bin)
{
if (m_enabled && !m_console && !m_file.IsInvalid())
{
Core::File file;
String file_name = m_file_name.FilenameWithoutExtension() + U".spv";
file.Create(file_name);
if (file.IsInvalid())
{
printf(FG_BRIGHT_RED "Can't create file: %s\n" FG_DEFAULT, file_name.C_Str());
} else
{
file.Write(bin.GetDataConst(), bin.Size() * 4);
file.Close();
}
}
}
private:
bool m_console = false;
bool m_enabled = false;
Core::File m_file;
String m_file_name;
};
Vector<uint32_t> ShaderRecompileVS(const VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info)
@ -2792,6 +2887,8 @@ Vector<uint32_t> ShaderRecompileCS(const ComputeShaderInfo* regs, const ShaderCo
}
log.DumpOptimizedShader(ret);
// log.DumpGlslShader(ret);
log.DumpBinary(ret);
return ret;
}

View file

@ -300,6 +300,42 @@ constexpr char32_t BUFFER_STORE_FLOAT1[] = UR"(
OpFunctionEnd
)";
constexpr char32_t BUFFER_STORE_FLOAT2[] = UR"(
; void buffer_store_float2(in float p1, in float p2, in int index, in int offset, in int stride, in int buffer_index)
; {
; int addr = (offset + index * stride)/4;
; buf[buffer_index].data[addr+0] = p1;
; buf[buffer_index].data[addr+1] = p2;
; }
%buffer_store_float2 = OpFunction %void None %function_buffer_load_store_float2
%buf_s_f2_51 = OpFunctionParameter %_ptr_Function_float
%buf_s_f2_52 = OpFunctionParameter %_ptr_Function_float
%buf_s_f2_53 = OpFunctionParameter %_ptr_Function_int
%buf_s_f2_54 = OpFunctionParameter %_ptr_Function_int
%buf_s_f2_55 = OpFunctionParameter %_ptr_Function_int
%buf_s_f2_56 = OpFunctionParameter %_ptr_Function_int
%buf_s_f2_58 = OpLabel
%buf_s_f2_143 = OpVariable %_ptr_Function_int Function
%buf_s_f2_144 = OpLoad %int %buf_s_f2_54
%buf_s_f2_145 = OpLoad %int %buf_s_f2_53
%buf_s_f2_146 = OpLoad %int %buf_s_f2_55
%buf_s_f2_147 = OpIMul %int %buf_s_f2_145 %buf_s_f2_146
%buf_s_f2_148 = OpIAdd %int %buf_s_f2_144 %buf_s_f2_147
%buf_s_f2_149 = OpSDiv %int %buf_s_f2_148 %int_4
OpStore %buf_s_f2_143 %buf_s_f2_149
%buf_s_f2_150 = OpLoad %int %buf_s_f2_56
%buf_s_f2_153 = OpLoad %float %buf_s_f2_51
%buf_s_f2_154 = OpAccessChain %_ptr_StorageBuffer_float %buf %buf_s_f2_150 %int_0 %buf_s_f2_149
OpStore %buf_s_f2_154 %buf_s_f2_153
%buf_s_f2_155 = OpLoad %int %buf_s_f2_56
%buf_s_f2_158 = OpIAdd %int %buf_s_f2_149 %int_1
%buf_s_f2_159 = OpLoad %float %buf_s_f2_52
%buf_s_f2_160 = OpAccessChain %_ptr_StorageBuffer_float %buf %buf_s_f2_155 %int_0 %buf_s_f2_158
OpStore %buf_s_f2_160 %buf_s_f2_159
OpReturn
OpFunctionEnd
)";
constexpr char32_t TBUFFER_LOAD_FORMAT_XYZW[] = UR"(
; Function tbuffer_load_format_xyzw
; void tbuffer_load_format_xyzw(out float p1, out float p2, out float p3, out float p4,
@ -360,7 +396,7 @@ constexpr char32_t TBUFFER_LOAD_FORMAT_XYZW[] = UR"(
constexpr char32_t TBUFFER_LOAD_FORMAT_X[] = UR"(
; void tbuffer_load_format_x(out float p1, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt)
; {
; if (dfmt_nfmt == 36) // dmft = 4, nfmt = 4
; if (dfmt_nfmt == 36 || dfmt_nfmt == 39) // dmft = 4, nfmt = 4 or 7
; {
; buffer_load_float1(p1, index, offset, stride, buffer_index);
; }
@ -380,8 +416,10 @@ constexpr char32_t TBUFFER_LOAD_FORMAT_X[] = UR"(
%tbuf_l_f_x_89 = OpVariable %_ptr_Function_int Function
%tbuf_l_f_x_76 = OpLoad %int %tbuf_l_f_x_31
%tbuf_l_f_x_79 = OpIEqual %bool %tbuf_l_f_x_76 %int_36
%tbuf_l_f_x_79_2 = OpIEqual %bool %tbuf_l_f_x_76 %int_39
%tbuf_l_f_x_79_3 = OpLogicalOr %bool %tbuf_l_f_x_79 %tbuf_l_f_x_79_2
OpSelectionMerge %tbuf_l_f_x_81 None
OpBranchConditional %tbuf_l_f_x_79 %tbuf_l_f_x_80 %tbuf_l_f_x_81
OpBranchConditional %tbuf_l_f_x_79_3 %tbuf_l_f_x_80 %tbuf_l_f_x_81
%tbuf_l_f_x_80 = OpLabel
%tbuf_l_f_x_84 = OpLoad %int %tbuf_l_f_x_27
OpStore %tbuf_l_f_x_83 %tbuf_l_f_x_84
@ -403,7 +441,7 @@ constexpr char32_t TBUFFER_LOAD_FORMAT_X[] = UR"(
constexpr char32_t TBUFFER_STORE_FORMAT_X[] = UR"(
; void tbuffer_store_format_x(in float p1, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt)
; {
; if (dfmt_nfmt == 36) // dmft = 4, nfmt = 4
; if (dfmt_nfmt == 36 || dfmt_nfmt == 39) // dmft = 4, nfmt = 4 or 7
; {
; buffer_store_float1(p1, index, offset, stride, buffer_index);
; }
@ -423,8 +461,10 @@ constexpr char32_t TBUFFER_STORE_FORMAT_X[] = UR"(
%tbuf_s_f_x_105 = OpVariable %_ptr_Function_int Function
%tbuf_s_f_x_93 = OpLoad %int %tbuf_s_f_x_39
%tbuf_s_f_x_94 = OpIEqual %bool %tbuf_s_f_x_93 %int_36
%tbuf_s_f_x_94_2 = OpIEqual %bool %tbuf_s_f_x_93 %int_39
%tbuf_s_f_x_94_3 = OpLogicalOr %bool %tbuf_s_f_x_94 %tbuf_s_f_x_94_2
OpSelectionMerge %tbuf_s_f_x_96 None
OpBranchConditional %tbuf_s_f_x_94 %tbuf_s_f_x_95 %tbuf_s_f_x_96
OpBranchConditional %tbuf_s_f_x_94_3 %tbuf_s_f_x_95 %tbuf_s_f_x_96
%tbuf_s_f_x_95 = OpLabel
%tbuf_s_f_x_98 = OpLoad %float %tbuf_s_f_x_34
OpStore %tbuf_s_f_x_97 %tbuf_s_f_x_98
@ -443,6 +483,56 @@ constexpr char32_t TBUFFER_STORE_FORMAT_X[] = UR"(
OpFunctionEnd
)";
constexpr char32_t TBUFFER_STORE_FORMAT_XY[] = UR"(
; void tbuffer_store_format_xy(in float p1, in float p2, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt)
; {
; if (dfmt_nfmt == 92 || dfmt_nfmt == 95) // dmft = 11, nfmt = 4 or 7
; {
; buffer_store_float2(p1, p2, index, offset, stride, buffer_index);
; }
; }
%tbuffer_store_format_xy = OpFunction %void None %function_tbuffer_load_store_format_xy
%tbuf_s_f_xy_60 = OpFunctionParameter %_ptr_Function_float
%tbuf_s_f_xy_61 = OpFunctionParameter %_ptr_Function_float
%tbuf_s_f_xy_62 = OpFunctionParameter %_ptr_Function_int
%tbuf_s_f_xy_63 = OpFunctionParameter %_ptr_Function_int
%tbuf_s_f_xy_64 = OpFunctionParameter %_ptr_Function_int
%tbuf_s_f_xy_65 = OpFunctionParameter %_ptr_Function_int
%tbuf_s_f_xy_66 = OpFunctionParameter %_ptr_Function_int
%tbuf_s_f_xy_68 = OpLabel
%tbuf_s_f_xy_170 = OpVariable %_ptr_Function_float Function
%tbuf_s_f_xy_172 = OpVariable %_ptr_Function_float Function
%tbuf_s_f_xy_174 = OpVariable %_ptr_Function_int Function
%tbuf_s_f_xy_176 = OpVariable %_ptr_Function_int Function
%tbuf_s_f_xy_178 = OpVariable %_ptr_Function_int Function
%tbuf_s_f_xy_180 = OpVariable %_ptr_Function_int Function
%tbuf_s_f_xy_161 = OpLoad %int %tbuf_s_f_xy_66
%tbuf_s_f_xy_163 = OpIEqual %bool %tbuf_s_f_xy_161 %int_92
%tbuf_s_f_xy_164 = OpLoad %int %tbuf_s_f_xy_66
%tbuf_s_f_xy_166 = OpIEqual %bool %tbuf_s_f_xy_164 %int_95
%tbuf_s_f_xy_167 = OpLogicalOr %bool %tbuf_s_f_xy_163 %tbuf_s_f_xy_166
OpSelectionMerge %tbuf_s_f_xy_169 None
OpBranchConditional %tbuf_s_f_xy_167 %tbuf_s_f_xy_168 %tbuf_s_f_xy_169
%tbuf_s_f_xy_168 = OpLabel
%tbuf_s_f_xy_171 = OpLoad %float %tbuf_s_f_xy_60
OpStore %tbuf_s_f_xy_170 %tbuf_s_f_xy_171
%tbuf_s_f_xy_173 = OpLoad %float %tbuf_s_f_xy_61
OpStore %tbuf_s_f_xy_172 %tbuf_s_f_xy_173
%tbuf_s_f_xy_175 = OpLoad %int %tbuf_s_f_xy_62
OpStore %tbuf_s_f_xy_174 %tbuf_s_f_xy_175
%tbuf_s_f_xy_177 = OpLoad %int %tbuf_s_f_xy_63
OpStore %tbuf_s_f_xy_176 %tbuf_s_f_xy_177
%tbuf_s_f_xy_179 = OpLoad %int %tbuf_s_f_xy_64
OpStore %tbuf_s_f_xy_178 %tbuf_s_f_xy_179
%tbuf_s_f_xy_181 = OpLoad %int %tbuf_s_f_xy_65
OpStore %tbuf_s_f_xy_180 %tbuf_s_f_xy_181
%tbuf_s_f_xy_182 = OpFunctionCall %void %buffer_store_float2 %tbuf_s_f_xy_170 %tbuf_s_f_xy_172 %tbuf_s_f_xy_174 %tbuf_s_f_xy_176 %tbuf_s_f_xy_178 %tbuf_s_f_xy_180
OpBranch %tbuf_s_f_xy_169
%tbuf_s_f_xy_169 = OpLabel
OpReturn
OpFunctionEnd
)";
constexpr char32_t SBUFFER_LOAD_DWORD[] = UR"(
; void sbuffer_load_dword(out uint p1, in int offset, in int buffer_index)
; {
@ -1199,12 +1289,23 @@ static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& resu
if (operand_is_constant(op))
{
String id = spirv->GetConstant(op);
bool u64 = (op.type == ShaderOperandType::IntegerInlineConstant && op.size == 2);
*load = String(U"%<result_id> = OpBitcast %uint %<id>")
.ReplaceStr(U"<index>", index)
.ReplaceStr(U"<id>", id)
.ReplaceStr(U"<result_id>", result_id);
if (u64)
{
EXIT_NOT_IMPLEMENTED(shift < 0);
EXIT_NOT_IMPLEMENTED(op.constant.i != 0);
*load = String(U"%<result_id> = OpBitcast %uint %uint_0").ReplaceStr(U"<index>", index).ReplaceStr(U"<result_id>", result_id);
} else
{
String id = spirv->GetConstant(op);
*load = String(U"%<result_id> = OpBitcast %uint %<id>")
.ReplaceStr(U"<index>", index)
.ReplaceStr(U"<id>", id)
.ReplaceStr(U"<result_id>", result_id);
}
} else if (operand_is_variable(op))
{
auto value = (shift >= 0 ? operand_variable_to_str(op, shift) : operand_variable_to_str(op));
@ -1226,7 +1327,6 @@ static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& resu
{
return false;
}
} else
{
return false;
@ -1502,8 +1602,6 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen)
if (bind_info != nullptr && bind_info->storage_buffers.buffers_num > 0)
{
// EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.src[0]));
// EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2]));
auto dst_value = operand_variable_to_str(inst.dst);
@ -1566,6 +1664,77 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen)
return false;
}
KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatXy_Vdata2VaddrSvSoffsIdxen)
{
const auto& inst = code.GetInstructions().At(index);
const auto* bind_info = spirv->GetBindInfo();
if (bind_info != nullptr && bind_info->storage_buffers.buffers_num > 0)
{
EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2]));
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
auto src0_value = operand_variable_to_str(inst.src[0]);
auto src1_value0 = operand_variable_to_str(inst.src[1], 0);
auto src1_value1 = operand_variable_to_str(inst.src[1], 1);
auto src1_value3 = operand_variable_to_str(inst.src[1], 3);
String offset = spirv->GetConstant(inst.src[2]);
EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Float);
EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float);
EXIT_NOT_IMPLEMENTED(src1_value0.type != SpirvType::Uint);
EXIT_NOT_IMPLEMENTED(src1_value1.type != SpirvType::Uint);
EXIT_NOT_IMPLEMENTED(src1_value3.type != SpirvType::Uint);
// TODO() check VSKIP
static const char32_t* text = UR"(
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
OpSelectionMerge %t278_<index> None
OpBranchConditional %exec_lo_b_<index> %t277_<index> %t278_<index>
%t277_<index> = OpLabel
%t100_<index> = OpLoad %float %<src0>
%t101_<index> = OpBitcast %int %t100_<index>
OpStore %temp_int_1 %t101_<index>
%t148_<index> = OpLoad %uint %<src1_value1>
%t150_<index> = OpShiftRightLogical %uint %t148_<index> %int_16
%t152_<index> = OpBitwiseAnd %uint %t150_<index> %uint_0x00003fff
%t153_<index> = OpBitcast %int %t152_<index>
OpStore %temp_int_3 %t153_<index>
%t155_<index> = OpLoad %uint %<src1_value0>
%t156_<index> = OpBitcast %int %t155_<index>
OpStore %temp_int_4 %t156_<index>
OpStore %temp_int_2 %<offset>
%t206_<index> = OpLoad %uint %<src1_value3>
%t208_<index> = OpShiftRightLogical %uint %t206_<index> %int_12
%t210_<index> = OpBitwiseAnd %uint %t208_<index> %uint_127
%t211_<index> = OpBitcast %int %t210_<index>
OpStore %temp_int_5 %t211_<index>
%t110_<index> = OpFunctionCall %void %tbuffer_store_format_xy %<p0> %<p1> %temp_int_1 %temp_int_2 %temp_int_3 %temp_int_4 %temp_int_5
OpBranch %t278_<index>
%t278_<index> = OpLabel
)";
*dst_source += String(text)
.ReplaceStr(U"<index>", String::FromPrintf("%u", index))
.ReplaceStr(U"<src0>", src0_value.value)
.ReplaceStr(U"<offset>", offset)
.ReplaceStr(U"<src1_value0>", src1_value0.value)
.ReplaceStr(U"<src1_value1>", src1_value1.value)
.ReplaceStr(U"<src1_value3>", src1_value3.value)
.ReplaceStr(U"<p0>", dst_value0.value)
.ReplaceStr(U"<p1>", dst_value1.value);
return true;
}
return false;
}
KYTY_RECOMPILER_FUNC(Recompile_DsAppend_VdstGds)
{
const auto& inst = code.GetInstructions().At(index);
@ -1644,7 +1813,7 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0OffOffComprVmDone)
const auto& inst = code.GetInstructions().At(index);
const auto& next_inst = code.GetInstructions().At(index + 1);
if (!(prev_inst.type == ShaderInstructionType::SMovB64 && prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc0 &&
if (!(prev_inst.type == ShaderInstructionType::SMovB64 && prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc02 &&
prev_inst.dst.type == ShaderOperandType::ExecLo && prev_inst.src[0].type == ShaderOperandType::IntegerInlineConstant &&
prev_inst.src[0].constant.i == 0 && next_inst.type == ShaderInstructionType::SEndpgm))
{
@ -1954,7 +2123,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF)
return false;
}
/* XXX: Andn2, Or, Nor */
/* XXX: Andn2, Or, Nor, Cselect */
KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12)
{
const auto& inst = code.GetInstructions().At(index);
@ -2170,7 +2339,7 @@ KYTY_RECOMPILER_FUNC(Recompile_SAndSaveexecB64_Sdst2Ssrc02)
return true;
}
/* XXX: Eq */
/* XXX: Eq, Le, Lg */
KYTY_RECOMPILER_FUNC(Recompile_SCmp_XXX_U32_Ssrc0Ssrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -2493,7 +2662,7 @@ KYTY_RECOMPILER_FUNC(Recompile_SEndpgm_Empty)
const auto& prev_inst = code.GetInstructions().At(index - 1);
bool after_kill =
(prev_prev_inst.type == ShaderInstructionType::SMovB64 && prev_prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc0 &&
(prev_prev_inst.type == ShaderInstructionType::SMovB64 && prev_prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc02 &&
prev_prev_inst.dst.type == ShaderOperandType::ExecLo && prev_prev_inst.src[0].type == ShaderOperandType::IntegerInlineConstant &&
prev_prev_inst.src[0].constant.i == 0 && prev_inst.type == ShaderInstructionType::Exp &&
prev_inst.format == ShaderInstructionFormat::Mrt0OffOffComprVmDone);
@ -2684,44 +2853,6 @@ KYTY_RECOMPILER_FUNC(Recompile_SMovB64_Sdst2Ssrc02)
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_SMovB64_Sdst2Ssrc0)
{
const auto& inst = code.GetInstructions().At(index);
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
auto dst_value0 = operand_variable_to_str(inst.dst, 0);
auto dst_value1 = operand_variable_to_str(inst.dst, 1);
EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint);
EXIT_NOT_IMPLEMENTED(!(inst.src[0].type == ShaderOperandType::IntegerInlineConstant && inst.src[0].constant.i >= 0));
String load0;
if (!operand_load_uint(spirv, inst.src[0], U"t0_<index>", index_str, &load0, 0))
{
return false;
}
static const char32_t* text = UR"(
<load0>
OpStore %<dst0> %t0_<index>
OpStore %<dst1> %uint_0
<execz>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst0>", dst_value0.value)
.ReplaceStr(U"<dst1>", dst_value1.value)
.ReplaceStr(U"<load0>", load0)
.ReplaceStr(U"<execz>", (operand_is_exec(inst.dst) ? EXECZ : U""))
.ReplaceStr(U"<index>", index_str);
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_SSwappcB64_Sdst2Ssrc02)
{
const auto& inst = code.GetInstructions().At(index);
@ -2920,7 +3051,7 @@ KYTY_RECOMPILER_FUNC(Recompile_TBufferLoadFormatXyzw_Vdata4Vaddr2SvSoffsOffenIdx
return false;
}
/* XXX: Eq, Le, Neq */
/* XXX: Eq, Ge, Le, Neq */
KYTY_RECOMPILER_FUNC(Recompile_VCmp_XXX_F32_SmaskVsrc0Vsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -3020,7 +3151,7 @@ KYTY_RECOMPILER_FUNC(Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1)
return true;
}
/* XXX: Le */
/* XXX: Le, Ge */
KYTY_RECOMPILER_FUNC(Recompile_VCmp_XXX_U32_SmaskVsrc0Vsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -3210,16 +3341,21 @@ KYTY_RECOMPILER_FUNC(Recompile_VCndmaskB32_VdstVsrc0Vsrc1Smask2)
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
<load1>
%t22_<index> = OpLoad %uint %<src0>
%t23_<index> = OpLoad %uint %<src1> ; unused
%t2_<index> = OpINotEqual %bool %t22_<index> %uint_0
%tb_<index> = OpBitwiseAnd %uint %t22_<index> %uint_1
%t2_<index> = OpINotEqual %bool %tb_<index> %uint_0
%t3_<index> = OpSelect %float %t2_<index> %t1_<index> %t0_<index>
OpStore %<dst> %t3_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t3_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3311,7 +3447,8 @@ KYTY_RECOMPILER_FUNC(Recompile_VInterpP2F32_VdstVsrcAttrChan)
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_VMadakF32_VdstVsrc0Vsrc1Vsrc2)
/* XXX: Mad, Madak, Madmk */
KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2)
{
const auto& inst = code.GetInstructions().At(index);
@ -3343,7 +3480,6 @@ KYTY_RECOMPILER_FUNC(Recompile_VMadakF32_VdstVsrc0Vsrc1Vsrc2)
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() check SP_ROUND
// TODO() check DX10_CLAMP
// TODO() check IEEE
@ -3352,68 +3488,20 @@ KYTY_RECOMPILER_FUNC(Recompile_VMadakF32_VdstVsrc0Vsrc1Vsrc2)
<load0>
<load1>
<load2>
%t245_<index> = OpExtInst %float %GLSL_std_450 Fma %t0_<index> %t1_<index> %t2_<index>
OpStore %<dst> %t245_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
.ReplaceStr(U"<load0>", load0)
.ReplaceStr(U"<load1>", load1)
.ReplaceStr(U"<load2>", load2)
.ReplaceStr(U"<index>", index_str);
return true;
}
KYTY_RECOMPILER_FUNC(Recompile_VMadF32_VdstVsrc0Vsrc1Vsrc2)
{
const auto& inst = code.GetInstructions().At(index);
String load0;
String load1;
String load2;
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
EXIT_NOT_IMPLEMENTED(inst.dst.clamp);
EXIT_NOT_IMPLEMENTED(inst.dst.multiplier != 1.0f);
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float);
if (!operand_load_float(spirv, inst.src[0], U"t0_<index>", index_str, &load0))
{
return false;
}
if (!operand_load_float(spirv, inst.src[1], U"t1_<index>", index_str, &load1))
{
return false;
}
if (!operand_load_float(spirv, inst.src[2], U"t2_<index>", index_str, &load2))
{
return false;
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() check SP_ROUND
// TODO() check DX10_CLAMP
// TODO() check IEEE
static const char32_t* text = UR"(
<load0>
<load1>
<load2>
%t245_<index> = OpExtInst %float %GLSL_std_450 Fma %t0_<index> %t1_<index> %t2_<index>
OpStore %<dst> %t245_<index>
<param>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
.ReplaceStr(U"<load0>", load0)
.ReplaceStr(U"<load1>", load1)
.ReplaceStr(U"<load2>", load2)
.ReplaceStr(U"<param>", param[0])
.ReplaceStr(U"<index>", index_str);
return true;
@ -3423,74 +3511,82 @@ KYTY_RECOMPILER_FUNC(Recompile_VMbcntHiU32B32_SVdstSVsrc0SVsrc1)
{
const auto& inst = code.GetInstructions().At(index);
if (inst.src[0].type == ShaderOperandType::ExecHi)
// if (inst.src[0].type == ShaderOperandType::ExecHi)
//{
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
EXIT_NOT_IMPLEMENTED(inst.dst.clamp);
EXIT_NOT_IMPLEMENTED(inst.dst.multiplier != 1.0f);
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float);
String load0;
if (!operand_load_float(spirv, inst.src[1], U"t1_<index>", index_str, &load0))
{
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
EXIT_NOT_IMPLEMENTED(inst.dst.clamp);
EXIT_NOT_IMPLEMENTED(inst.dst.multiplier != 1.0f);
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float);
String load0;
if (!operand_load_float(spirv, inst.src[1], U"t0_<index>", index_str, &load0))
{
return false;
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
OpStore %<dst> %t0_<index>
)";
*dst_source += String(text).ReplaceStr(U"<dst>", dst_value.value).ReplaceStr(U"<load0>", load0).ReplaceStr(U"<index>", index_str);
return true;
return false;
}
return false;
// TODO() check VSKIP
static const char32_t* text = UR"(
<load0>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t1_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text).ReplaceStr(U"<dst>", dst_value.value).ReplaceStr(U"<load0>", load0).ReplaceStr(U"<index>", index_str);
return true;
//}
// return false;
}
KYTY_RECOMPILER_FUNC(Recompile_VMbcntLoU32B32_SVdstSVsrc0SVsrc1)
{
const auto& inst = code.GetInstructions().At(index);
if (inst.src[0].type == ShaderOperandType::ExecLo)
// if (inst.src[0].type == ShaderOperandType::ExecLo)
//{
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float);
String load0;
if (!operand_load_float(spirv, inst.src[1], U"t1_<index>", index_str, &load0))
{
String index_str = String::FromPrintf("%u", index);
EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst));
auto dst_value = operand_variable_to_str(inst.dst);
EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float);
String load0;
if (!operand_load_float(spirv, inst.src[1], U"t0_<index>", index_str, &load0))
{
return false;
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
OpStore %<dst> %t0_<index>
)";
*dst_source += String(text).ReplaceStr(U"<dst>", dst_value.value).ReplaceStr(U"<load0>", load0).ReplaceStr(U"<index>", index_str);
return true;
return false;
}
return false;
// TODO() check VSKIP
static const char32_t* text = UR"(
<load0>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t1_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text).ReplaceStr(U"<dst>", dst_value.value).ReplaceStr(U"<load0>", load0).ReplaceStr(U"<index>", index_str);
return true;
//}
// return false;
}
/* XXX: Not */
@ -3551,11 +3647,15 @@ KYTY_RECOMPILER_FUNC(Recompile_VMovB32_SVdstSVsrc0)
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
OpStore %<dst> %t0_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t0_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text).ReplaceStr(U"<dst>", dst_value.value).ReplaceStr(U"<load0>", load0).ReplaceStr(U"<index>", index_str);
@ -3594,7 +3694,6 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_SVdstSVsrc0SVsrc1)
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() check SP_DENORM
// TODO() check SP_ROUND
// TODO() check DX10_CLAMP
@ -3605,9 +3704,17 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_SVdstSVsrc0SVsrc1)
<load1>
<load_dst>
<param>
OpStore %<dst> %t_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
OpSelectionMerge %tl2_<index> None
OpBranchConditional %exec_lo_b_<index> %tl1_<index> %tl2_<index>
%tl1_<index> = OpLabel
OpStore %<dst> %t_<index>
<multiply>
<clamp>
OpBranch %tl2_<index>
%tl2_<index> = OpLabel
)";
*dst_source += String(text)
.ReplaceStr(U"<multiply>", (inst.dst.multiplier != 1.0f
@ -3647,14 +3754,18 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_SVdstSVsrc0)
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() check DX10_CLAMP
// TODO() check IEEE
static const char32_t* text = UR"(
<load0>
<param>
OpStore %<dst> %t_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3665,7 +3776,7 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_SVdstSVsrc0)
return true;
}
/* XXX: And, Lshr, Lshlrev, Lshrrev, MulU32U24 */
/* XXX: And, Bcnt, Lshr, Lshl, Lshlrev, Lshrrev, MulU32U24, MulLoU32, MulHiU32 */
KYTY_RECOMPILER_FUNC(Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -3693,7 +3804,6 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1)
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
@ -3702,7 +3812,12 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1)
<param1>
<param2>
%tf_<index> = OpBitcast %float %t_<index>
OpStore %<dst> %tf_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %tf_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3716,7 +3831,7 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1)
return true;
}
/* XXX: Ashrrev, MulLo */
/* XXX: Ashr, Ashrrev, MulLo */
KYTY_RECOMPILER_FUNC(Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1)
{
const auto& inst = code.GetInstructions().At(index);
@ -3744,7 +3859,6 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1)
}
// TODO() check VSKIP
// TODO() check EXEC
static const char32_t* text = UR"(
<load0>
@ -3753,6 +3867,12 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1)
<param1>
%tf_<index> = OpBitcast %float %t_<index>
OpStore %<dst> %tf_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %tf_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3833,14 +3953,18 @@ KYTY_RECOMPILER_FUNC(Recompile_VCvtF32_XXX_SVdstSVsrc0)
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() check SP_ROUND
static const char32_t* text = UR"(
<load0>
<param0>
<param1>
OpStore %<dst> %t_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %t_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3885,7 +4009,6 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2)
}
// TODO() check VSKIP
// TODO() check EXEC
// TODO() Sad: use only lower 16 bits of Vaccum
static const char32_t* text = UR"(
@ -3897,7 +4020,12 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2)
<param2>
<param3>
%tf_<index> = OpBitcast %float %t_<index>
OpStore %<dst> %tf_<index>
%exec_lo_u_<index> = OpLoad %uint %exec_lo
%exec_hi_u_<index> = OpLoad %uint %exec_hi ; unused
%exec_lo_b_<index> = OpINotEqual %bool %exec_lo_u_<index> %uint_0
%tdst_<index> = OpLoad %float %<dst>
%tval_<index> = OpSelect %float %exec_lo_b_<index> %tf_<index> %tdst_<index>
OpStore %<dst> %tval_<index>
)";
*dst_source += String(text)
.ReplaceStr(U"<dst>", dst_value.value)
@ -3976,10 +4104,11 @@ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_U32_VdstSdst2Vsrc0Vsrc1)
static RecompilerFunc g_recomp_func[] = {
// clang-format off
{Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferLoadDword, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferLoadFormatX_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferLoadFormatX, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferStoreDword_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferStoreDword, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferStoreFormatX, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferLoadDword, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferLoadFormatX_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferLoadFormatX, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferStoreDword_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferStoreDword, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen, ShaderInstructionType::BufferStoreFormatX, ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen, {U""}},
{Recompile_BufferStoreFormatXy_Vdata2VaddrSvSoffsIdxen, ShaderInstructionType::BufferStoreFormatXy, ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen, {U""}},
{Recompile_DsAppend_VdstGds, ShaderInstructionType::DsAppend, ShaderInstructionFormat::VdstGds, {U""}},
{Recompile_DsConsume_VdstGds, ShaderInstructionType::DsConsume, ShaderInstructionFormat::VdstGds, {U""}},
@ -4010,27 +4139,31 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_SLoadDwordx4_Sdst4SbaseSoffset, ShaderInstructionType::SLoadDwordx4, ShaderInstructionFormat::Sdst4SbaseSoffset, {U""}},
{Recompile_SLoadDwordx8_Sdst8SbaseSoffset, ShaderInstructionType::SLoadDwordx8, ShaderInstructionFormat::Sdst8SbaseSoffset, {U""}},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SAndn2B64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpNot %uint %t2_<index>",
U"%tb_<index> = OpBitwiseAnd %uint %t0_<index> %ta_<index>",
U"%tc_<index> = OpNot %uint %t3_<index>",
U"%td_<index> = OpBitwiseAnd %uint %t1_<index> %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%tb_<index> = OpNot %uint %ta_<index>",
U"%tc_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>",
U"%td_<index> = OpNot %uint %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%td_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SAndn2B64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpNot %uint %t2_<index>",
U"%tb_<index> = OpBitwiseAnd %uint %t0_<index> %ta_<index>",
U"%tc_<index> = OpNot %uint %t3_<index>",
U"%td_<index> = OpBitwiseAnd %uint %t1_<index> %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SNorB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ta_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%tb_<index> = OpNot %uint %ta_<index>",
U"%tc_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>",
U"%td_<index> = OpNot %uint %tc_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SOrB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%tb_<index> = OpBitwiseOr %uint %t0_<index> %t2_<index>",
U"%td_<index> = OpBitwiseOr %uint %t1_<index> %t3_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12, ShaderInstructionType::SCselectB64, ShaderInstructionFormat::Sdst2Ssrc02Ssrc12, {U"%ts_<index> = OpLoad %uint %scc",
U"%tsb_<index> = OpINotEqual %bool %ts_<index> %uint_0",
U"%tb_<index> = OpSelect %uint %tsb_<index> %t0_<index> %t2_<index>",
U"%td_<index> = OpSelect %uint %tsb_<index> %t1_<index> %t3_<index>" }, SccCheck::None},
{Recompile_VCvtPkrtzF16F32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VCvtPkrtzF16F32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_VMbcntHiU32B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMbcntHiU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_VMbcntLoU32B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMbcntLoU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SAndB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpBitwiseAnd %uint %t0_<index> %t1_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SBfmB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%to_<index> = OpBitwiseAnd %uint %t0_<index> %uint_31", U"%ts_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", U"%t_<index> = OpBitFieldInsert %uint %uint_0 %uint_0xffffffff %to_<index> %ts_<index>"}, SccCheck::None},
{Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SCselectB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t22_<index> = OpLoad %uint %scc", U"%t2_<index> = OpINotEqual %bool %t22_<index> %uint_0", U"%t_<index> = OpSelect %uint %t2_<index> %t0_<index> %t1_<index>"}, SccCheck::None},
{Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SLshlB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", U"%t_<index> = OpShiftLeftLogical %uint %t0_<index> %ts_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SLshrB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", U"%t_<index> = OpShiftRightLogical %uint %t0_<index> %ts_<index>"}, SccCheck::NonZero},
{Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SAddI32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpIAdd %int %t0_<index> %t1_<index>"}, SccCheck::Overflow},
{Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SMulI32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpIMul %int %t0_<index> %t1_<index>"}, SccCheck::None},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VAndB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpBitwiseAnd %uint %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VBcntU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%tb_<index> = OpBitCount %int %t0_<index>", U"%tbu_<index> = OpBitcast %uint %tb_<index>", U"%t_<index> = OpIAdd %uint %tbu_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VLshlB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", U"%t_<index> = OpShiftLeftLogical %uint %t0_<index> %ts_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VLshlrevB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t0_<index> %uint_31", U"%t_<index> = OpShiftLeftLogical %uint %t1_<index> %ts_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VLshrB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31", U"%t_<index> = OpShiftRightLogical %uint %t0_<index> %ts_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VLshrrevB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %uint %t0_<index> %uint_31", U"%t_<index> = OpShiftRightLogical %uint %t1_<index> %ts_<index>"}},
@ -4041,8 +4174,14 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_V_XXX_F32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMulF32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFMul %float %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_F32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VSubF32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFSub %float %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_F32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VSubrevF32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFSub %float %t1_<index> %t0_<index>"}},
{Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VAshrI32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %int %t1_<index> %int_31", U"%t_<index> = OpShiftRightArithmetic %int %t0_<index> %ts_<index>"}},
{Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VAshrrevI32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%ts_<index> = OpBitwiseAnd %int %t0_<index> %int_31", U"%t_<index> = OpShiftRightArithmetic %int %t1_<index> %ts_<index>"}},
{Recompile_V_XXX_I32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMulLoI32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFunctionCall %int %mul_lo_int %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMulLoU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFunctionCall %uint %mul_lo_uint %t0_<index> %t1_<index>"}},
{Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMulHiU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U"%t_<index> = OpFunctionCall %uint %mul_hi_uint %t0_<index> %t1_<index>"}},
{Recompile_VCvtPkrtzF16F32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VCvtPkrtzF16F32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_VMbcntHiU32B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMbcntHiU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_VMbcntLoU32B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VMbcntLoU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {U""}},
{Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovB32, ShaderInstructionFormat::SVdstSVsrc0, {U""}},
@ -4063,8 +4202,6 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_SSwappcB64_Sdst2Ssrc02, ShaderInstructionType::SSwappcB64, ShaderInstructionFormat::Sdst2Ssrc02, {U""}},
{Recompile_SWqmB64_Sdst2Ssrc02, ShaderInstructionType::SWqmB64, ShaderInstructionFormat::Sdst2Ssrc02, {U""}},
{Recompile_SMovB64_Sdst2Ssrc0, ShaderInstructionType::SMovB64, ShaderInstructionFormat::Sdst2Ssrc0, {U""}},
{Recompile_SWaitcnt_Imm, ShaderInstructionType::SWaitcnt, ShaderInstructionFormat::Imm, {U""}},
{Recompile_TBufferLoadFormatXyzw_Vdata4VaddrSvSoffsIdxenFloat4, ShaderInstructionType::TBufferLoadFormatXyzw, ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxenFloat4, {U""}},
@ -4076,33 +4213,41 @@ static RecompilerFunc g_recomp_func[] = {
{Recompile_VCmp_XXX_F32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpEqF32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpFOrdEqual"}},
{Recompile_VCmp_XXX_F32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpLeF32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpFOrdLessThanEqual"}},
{Recompile_VCmp_XXX_F32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpGeF32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpFOrdGreaterThanEqual"}},
{Recompile_VCmp_XXX_F32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpNeqF32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpFUnordNotEqual"}},
{Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpEqI32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpIEqual"}},
{Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpEqU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpIEqual"}},
{Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpGtI32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpSGreaterThan"}},
{Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpNeI32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}},
{Recompile_VCmp_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpNeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}},
{Recompile_VCmp_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpGeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThanEqual"}},
{Recompile_VCmp_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpLeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpULessThanEqual"}},
{Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxEqU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpIEqual"}},
{Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxNeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}},
{Recompile_VCmpx_XXX_I32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxNeU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpINotEqual"}},
{Recompile_VCmpx_XXX_U32_SmaskVsrc0Vsrc1, ShaderInstructionType::VCmpxGtU32, ShaderInstructionFormat::SmaskVsrc0Vsrc1, {U"OpUGreaterThan"}},
{Recompile_SCmp_XXX_U32_Ssrc0Ssrc1, ShaderInstructionType::SCmpEqU32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpIEqual"}},
{Recompile_SCmp_XXX_U32_Ssrc0Ssrc1, ShaderInstructionType::SCmpGeU32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpUGreaterThanEqual"}},
{Recompile_SCmp_XXX_U32_Ssrc0Ssrc1, ShaderInstructionType::SCmpLeU32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpULessThanEqual"}},
{Recompile_SCmp_XXX_U32_Ssrc0Ssrc1, ShaderInstructionType::SCmpLgU32, ShaderInstructionFormat::Ssrc0Ssrc1, {U"OpINotEqual"}},
{Recompile_VCndmaskB32_VdstVsrc0Vsrc1Smask2, ShaderInstructionType::VCndmaskB32, ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2, {U""}},
{Recompile_VInterpP1F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP1F32, ShaderInstructionFormat::VdstVsrcAttrChan, {U""}},
{Recompile_VInterpP2F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP2F32, ShaderInstructionFormat::VdstVsrcAttrChan, {U""}},
{Recompile_VMadakF32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadakF32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U""}},
{Recompile_VMadF32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadF32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U""}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VSadU32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%td_<index> = OpFunctionCall %uint %abs_diff %t0_<index> %t1_<index>",
U"%t_<index> = OpIAdd %uint %td_<index> %t2_<index>"}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VBfeU32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%to_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31",
U"%ts_<index> = OpBitwiseAnd %uint %t2_<index> %uint_31",
U"%t_<index> = OpBitFieldUExtract %uint %t0_<index> %to_<index> %ts_<index>"}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadU32U24, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%tu0_<index> = OpBitwiseAnd %uint %t0_<index> %uint_0x00ffffff",
U"%tu1_<index> = OpBitwiseAnd %uint %t1_<index> %uint_0x00ffffff",
U"%tm_<index> = OpFunctionCall %uint %mul_lo_uint %tu0_<index> %tu1_<index>",
U"%t_<index> = OpIAdd %uint %tm_<index> %t2_<index>"}},
{Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadF32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%t_<index> = OpExtInst %float %GLSL_std_450 Fma %t0_<index> %t1_<index> %t2_<index>"}},
{Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadakF32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%t_<index> = OpExtInst %float %GLSL_std_450 Fma %t0_<index> %t1_<index> %t2_<index>"}},
{Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadmkF32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%t_<index> = OpExtInst %float %GLSL_std_450 Fma %t0_<index> %t1_<index> %t2_<index>"}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VSadU32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%td_<index> = OpFunctionCall %uint %abs_diff %t0_<index> %t1_<index>",
U"%t_<index> = OpIAdd %uint %td_<index> %t2_<index>"}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VBfeU32, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%to_<index> = OpBitwiseAnd %uint %t1_<index> %uint_31",
U"%ts_<index> = OpBitwiseAnd %uint %t2_<index> %uint_31",
U"%t_<index> = OpBitFieldUExtract %uint %t0_<index> %to_<index> %ts_<index>"}},
{Recompile_V_XXX_U32_VdstVsrc0Vsrc1Vsrc2, ShaderInstructionType::VMadU32U24, ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2, {U"%tu0_<index> = OpBitwiseAnd %uint %t0_<index> %uint_0x00ffffff",
U"%tu1_<index> = OpBitwiseAnd %uint %t1_<index> %uint_0x00ffffff",
U"%tm_<index> = OpFunctionCall %uint %mul_lo_uint %tu0_<index> %tu1_<index>",
U"%t_<index> = OpIAdd %uint %tm_<index> %t2_<index>"}},
// clang-format on
};
@ -4580,15 +4725,17 @@ void Spirv::WriteTypes()
%function_fetch4 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_v4float
%function_u_u = OpTypeFunction %uint %uint %uint
%function_i_i = OpTypeFunction %int %int %int
%function_tbuffer_load_format_xyzw = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_buffer_load_store_float1 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_buffer_load_float4 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_tbuffer_load_store_format_x = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_2 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_4 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_8 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_16 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_tbuffer_load_format_xyzw = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_buffer_load_store_float1 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_buffer_load_store_float2 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_buffer_load_float4 = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_tbuffer_load_store_format_x = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_tbuffer_load_store_format_xy = OpTypeFunction %void %_ptr_Function_float %_ptr_Function_float %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_2 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_4 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_8 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
%function_sbuffer_load_dword_16 = OpTypeFunction %void %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_uint %_ptr_Function_int %_ptr_Function_int
)";
static const char32_t* pixel_types = UR"(
@ -5118,7 +5265,8 @@ void Spirv::WriteFunctions()
m_source += FUNC_ABS_DIFF;
}
if (m_code.HasAnyOf({ShaderInstructionType::VMulLoI32, ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24}))
if (m_code.HasAnyOf({ShaderInstructionType::VMulLoI32, ShaderInstructionType::VMulLoU32, ShaderInstructionType::VMulHiU32,
ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24}))
{
m_source += FUNC_MUL_EXTENDED;
}
@ -5141,10 +5289,13 @@ void Spirv::WriteFunctions()
m_source += TBUFFER_LOAD_FORMAT_XYZW;
}
if (m_code.HasAnyOf({ShaderInstructionType::BufferStoreDword, ShaderInstructionType::BufferStoreFormatX}))
if (m_code.HasAnyOf({ShaderInstructionType::BufferStoreDword, ShaderInstructionType::BufferStoreFormatX,
ShaderInstructionType::BufferStoreFormatXy}))
{
m_source += BUFFER_STORE_FLOAT1;
m_source += BUFFER_STORE_FLOAT2;
m_source += TBUFFER_STORE_FORMAT_X;
m_source += TBUFFER_STORE_FORMAT_XY;
}
if (m_code.HasAnyOf({ShaderInstructionType::SBufferLoadDword, ShaderInstructionType::SBufferLoadDwordx2,
@ -5188,6 +5339,9 @@ void Spirv::FindConstants()
AddConstantInt(16);
AddConstantInt(31);
AddConstantInt(36);
AddConstantInt(39);
AddConstantInt(92);
AddConstantInt(95);
AddConstantInt(119);
AddConstantUint(24);
AddConstantUint(31);
@ -5195,6 +5349,7 @@ void Spirv::FindConstants()
AddConstantUint(127);
AddConstantUint(0x3fff);
AddConstantUint(0xffffff);
AddConstantUint(0xffffffff);
}
if (m_cs_input_info != nullptr)
{