diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 5e1891a301..8dd52b2dcd 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -41,6 +41,14 @@ #include #endif +enum class GPUReplacementSkip { + MEMSET = 1, + MEMCPY = 2, + MEMMOVE = 4, +}; + +static int skipGPUReplacements = 0; + // I think these have to be pretty accurate as these are libc replacements, // but we can probably get away with approximating the VFPU vsin/vcos and vrot // pretty roughly. @@ -119,8 +127,10 @@ static int Replace_memcpy() { // Some games use memcpy on executable code. We need to flush emuhack ops. currentMIPS->InvalidateICache(srcPtr, bytes); - if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMCPY) == 0) { + if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + } } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointer(destPtr); @@ -159,8 +169,10 @@ static int Replace_memcpy_jak() { return 5; } currentMIPS->InvalidateICache(srcPtr, bytes); - if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMCPY) == 0) { + if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + } } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointer(destPtr); @@ -197,8 +209,10 @@ static int Replace_memcpy16() { // Some games use memcpy on executable code. We need to flush emuhack ops. currentMIPS->InvalidateICache(srcPtr, bytes); - if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMCPY) == 0) { + if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + } } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointer(destPtr); @@ -220,8 +234,10 @@ static int Replace_memcpy_swizzled() { u32 srcPtr = PARAM(1); u32 pitch = PARAM(2); u32 h = PARAM(4); - if (Memory::IsVRAMAddress(srcPtr)) { - gpu->PerformMemoryDownload(srcPtr, pitch * h); + if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMCPY) == 0) { + if (Memory::IsVRAMAddress(srcPtr)) { + gpu->PerformMemoryDownload(srcPtr, pitch * h); + } } u8 *dstp = Memory::GetPointer(destPtr); const u8 *srcp = Memory::GetPointer(srcPtr); @@ -258,9 +274,11 @@ static int Replace_memmove() { bool skip = false; // Some games use memcpy on executable code. We need to flush emuhack ops. - currentMIPS->InvalidateICache(srcPtr, bytes); - if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMMOVE) == 0) { + currentMIPS->InvalidateICache(srcPtr, bytes); + if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); + } } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointer(destPtr); @@ -282,7 +300,7 @@ static int Replace_memset() { u8 value = PARAM(1); u32 bytes = PARAM(2); bool skip = false; - if (Memory::IsVRAMAddress(destPtr)) { + if (Memory::IsVRAMAddress(destPtr) && (skipGPUReplacements & (int)GPUReplacementSkip::MEMSET) == 0) { skip = gpu->PerformMemorySet(destPtr, value, bytes); } if (!skip && bytes != 0) { @@ -309,7 +327,7 @@ static int Replace_memset_jak() { } bool skip = false; - if (Memory::IsVRAMAddress(destPtr)) { + if (Memory::IsVRAMAddress(destPtr) && (skipGPUReplacements & (int)GPUReplacementSkip::MEMSET) == 0) { skip = gpu->PerformMemorySet(destPtr, value, bytes); } if (!skip && bytes != 0) { @@ -599,6 +617,17 @@ static bool GetMIPSStaticAddress(u32 &addr, s32 lui_offset, s32 lw_offset) { return true; } +static bool GetMIPSGPAddress(u32 &addr, s32 offset) { + const MIPSOpcode loadOp = Memory::Read_Instruction(currentMIPS->pc + offset, true); + if (MIPS_GET_RS(loadOp) == MIPS_REG_GP) { + s16 gpoff = (s16)(u16)(loadOp & 0x0000FFFF); + addr = currentMIPS->r[MIPS_REG_GP] + gpoff; + return true; + } + + return false; +} + static int Hook_godseaterburst_blit_texture() { u32 texaddr; // Only if there's no texture. @@ -1032,7 +1061,6 @@ static int Hook_tonyhawkp8_upload_tutorial_frame() { const u32 fb_address = currentMIPS->r[MIPS_REG_A0]; if (Memory::IsVRAMAddress(fb_address)) { gpu->PerformMemoryUpload(fb_address, 0x00088000); - CBreakPoints::ExecMemCheck(fb_address, true, 0x00088000, currentMIPS->pc); } return 0; } @@ -1145,7 +1173,6 @@ static int Hook_mytranwars_upload_frame() { u32 fb_address = currentMIPS->r[MIPS_REG_S0]; if (Memory::IsVRAMAddress(fb_address)) { gpu->PerformMemoryUpload(fb_address, 0x00088000); - CBreakPoints::ExecMemCheck(fb_address, true, 0x00088000, currentMIPS->pc); } return 0; } @@ -1183,6 +1210,29 @@ static int Hook_marvelalliance1_copy_after() { return 0; } +static int Hook_starocean_clear_framebuf_before() { + skipGPUReplacements |= (int)GPUReplacementSkip::MEMSET; + return 0; +} + +static int Hook_starocean_clear_framebuf_after() { + skipGPUReplacements &= ~(int)GPUReplacementSkip::MEMSET; + + // This hook runs after the copy, this is the final memcpy destination. + u32 framebuf = currentMIPS->r[MIPS_REG_V0] - 512 * 4 * 271; + u32 y_address, h_address; + + if (GetMIPSGPAddress(y_address, -204) && GetMIPSGPAddress(h_address, -200)) { + int y = (s16)Memory::Read_U16(y_address); + int h = (s16)Memory::Read_U16(h_address); + + DEBUG_LOG(HLE, "starocean_clear_framebuf() - %08x y=%d-%d", framebuf, y, h); + // TODO: This is always clearing to 0, actually, which could be faster than an upload. + gpu->PerformMemoryUpload(framebuf + 512 * y * 4, 512 * h * 4); + } + return 0; +} + #define JITFUNC(f) (&MIPSComp::MIPSFrontendInterface::f) // Can either replace with C functions or functions emitted in Asm/ArmAsm. @@ -1292,6 +1342,8 @@ static const ReplacementTableEntry entries[] = { { "marvelalliance1_copy", &Hook_marvelalliance1_copy_after, 0, REPFLAG_HOOKENTER, 0x638 }, { "marvelalliance1_copy", &Hook_marvelalliance1_copy_a1_before, 0, REPFLAG_HOOKENTER, 0x664 }, { "marvelalliance1_copy", &Hook_marvelalliance1_copy_after, 0, REPFLAG_HOOKENTER, 0x69c }, + { "starocean_clear_framebuf", &Hook_starocean_clear_framebuf_before, 0, REPFLAG_HOOKENTER, 0 }, + { "starocean_clear_framebuf", &Hook_starocean_clear_framebuf_after, 0, REPFLAG_HOOKEXIT, 0 }, {} }; @@ -1306,6 +1358,8 @@ void Replacement_Init() { continue; replacementNameLookup[entry->name].push_back(i); } + + skipGPUReplacements = 0; } void Replacement_Shutdown() { @@ -1313,8 +1367,6 @@ void Replacement_Shutdown() { replacementNameLookup.clear(); } -// TODO: Do something on load state? - int GetNumReplacementFuncs() { return ARRAY_SIZE(entries); } diff --git a/Core/MIPS/MIPSAnalyst.cpp b/Core/MIPS/MIPSAnalyst.cpp index d651ec7827..3070c864b3 100644 --- a/Core/MIPS/MIPSAnalyst.cpp +++ b/Core/MIPS/MIPSAnalyst.cpp @@ -382,6 +382,7 @@ static const HardHashTableEntry hardcodedHashes[] = { { 0xb0ef265e87899f0a, 32, "vector_divide_t_s", }, { 0xb183a37baa12607b, 32, "vscl_t", }, { 0xb1a3e60a89af9857, 20, "fabs", }, + { 0xb25670ff47b4843d, 232, "starocean_clear_framebuf" }, { 0xb3fef47fb27d57c9, 44, "vector_scale_t", }, { 0xb43fd5078ae78029, 84, "send_commandi_stall", }, { 0xb43ffbd4dc446dd2, 324, "atan2f", }, diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index c8516d4984..9e0081ec8e 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -738,6 +738,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int DrawTextureFlags flags = (vfb || g_Config.iBufFilter == SCALE_LINEAR) ? DRAWTEX_LINEAR : DRAWTEX_NEAREST; Bind2DShader(); DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags); + gpuStats.numUploads++; } void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags) { diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index df3a9aa6f1..24dcfdd734 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -527,7 +527,7 @@ void GPU_D3D11::GetStats(char *buffer, size_t bufsize) { "Cached, Uncached Vertices Drawn: %i, %i\n" "FBOs active: %i\n" "Textures active: %i, decoded: %i invalidated: %i\n" - "Readbacks: %d\n" + "Readbacks: %d, uploads: %d\n" "Vertex, Fragment shaders loaded: %i, %i\n", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawCalls, @@ -546,6 +546,7 @@ void GPU_D3D11::GetStats(char *buffer, size_t bufsize) { gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, gpuStats.numReadbacks, + gpuStats.numUploads, shaderManagerD3D11_->GetNumVertexShaders(), shaderManagerD3D11_->GetNumFragmentShaders() ); diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index fb071c7a50..158442285c 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -501,7 +501,7 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) { "Cached, Uncached Vertices Drawn: %i, %i\n" "FBOs active: %i\n" "Textures active: %i, decoded: %i invalidated: %i\n" - "Readbacks: %d\n" + "Readbacks: %d, uploads: %d\n" "Vertex, Fragment shaders loaded: %i, %i\n", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawCalls, @@ -520,6 +520,7 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) { gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, gpuStats.numReadbacks, + gpuStats.numUploads, shaderManagerDX9_->GetNumVertexShaders(), shaderManagerDX9_->GetNumFragmentShaders() ); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 9f94d37aa6..96b7763c25 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -733,7 +733,7 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { "Cached, Uncached Vertices Drawn: %i, %i\n" "FBOs active: %i\n" "Textures active: %i, decoded: %i invalidated: %i\n" - "Readbacks: %d\n" + "Readbacks: %d, uploads: %d\n" "Vertex, Fragment, Programs loaded: %i, %i, %i\n", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawCalls, @@ -752,6 +752,7 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, gpuStats.numReadbacks, + gpuStats.numUploads, shaderManagerGL_->GetNumVertexShaders(), shaderManagerGL_->GetNumFragmentShaders(), shaderManagerGL_->GetNumPrograms()); diff --git a/GPU/GPU.h b/GPU/GPU.h index 7e10db49aa..08d036560c 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -68,6 +68,7 @@ struct GPUStatistics { numFlushes = 0; numTexturesDecoded = 0; numReadbacks = 0; + numUploads = 0; numClears = 0; msProcessingDisplayLists = 0; vertexGPUCycles = 0; @@ -88,6 +89,7 @@ struct GPUStatistics { int numShaderSwitches; int numTexturesDecoded; int numReadbacks; + int numUploads; int numClears; double msProcessingDisplayLists; int vertexGPUCycles; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 2f3689081a..b7db5cd197 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -668,7 +668,7 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { "Cached, Uncached Vertices Drawn: %i, %i\n" "FBOs active: %i\n" "Textures active: %i, decoded: %i invalidated: %i\n" - "Readbacks: %d\n" + "Readbacks: %d, uploads: %d\n" "Vertex, Fragment, Pipelines loaded: %i, %i, %i\n" "Pushbuffer space used: UBO %d, Vtx %d, Idx %d\n" "%s\n", @@ -689,6 +689,7 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, gpuStats.numReadbacks, + gpuStats.numUploads, shaderManagerVulkan_->GetNumVertexShaders(), shaderManagerVulkan_->GetNumFragmentShaders(), pipelineManager_->GetNumPipelines(),