From f198a7f5876dc254afeb5fbd068f18c353bdf3c7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Aug 2014 13:04:00 -0700 Subject: [PATCH 1/4] Use checked pointers in all function replacements. Some games are still hitting bugs and there's no way to disable this "fast memory" without losing graphical effects. Better to do it evenly than weird hacks and extra logging/checks. --- Core/HLE/ReplaceTables.cpp | 194 +++++++++++++++++++++---------------- 1 file changed, 113 insertions(+), 81 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 58967b3a8f..e42abe3c14 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -113,11 +113,13 @@ static int Replace_memcpy() { if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } - if (!skip && bytes != 0 && destPtr != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - const u8 *src = Memory::GetPointerUnchecked(srcPtr); + if (!skip && bytes != 0) { + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); - if (std::min(destPtr, srcPtr) + bytes > std::max(destPtr, srcPtr)) { + if (!dst || !src) { + // Already logged. + } else if (std::min(destPtr, srcPtr) + bytes > std::max(destPtr, srcPtr)) { // Overlap. Star Ocean breaks if it's not handled in 16 bytes blocks. const u32 blocks = bytes & ~0x0f; for (u32 offset = 0; offset < blocks; offset += 0x10) { @@ -150,9 +152,11 @@ static int Replace_memcpy16() { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - u8 *src = Memory::GetPointerUnchecked(srcPtr); - memmove(dst, src, bytes); + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); + if (dst && src) { + memmove(dst, src, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -170,22 +174,24 @@ static int Replace_memcpy_swizzled() { if (Memory::IsVRAMAddress(srcPtr)) { gpu->PerformMemoryDownload(srcPtr, pitch * h); } - u8 *dstp = Memory::GetPointerUnchecked(destPtr); - const u8 *srcp = Memory::GetPointerUnchecked(srcPtr); + u8 *dstp = Memory::GetPointer(destPtr); + const u8 *srcp = Memory::GetPointer(srcPtr); - const u8 *ysrcp = srcp; - for (u32 y = 0; y < h; y += 8) { - const u8 *xsrcp = ysrcp; - for (u32 x = 0; x < pitch; x += 16) { - const u8 *src = xsrcp; - for (int n = 0; n < 8; ++n) { - memcpy(dstp, src, 16); - src += pitch; - dstp += 16; + if (dstp && srcp) { + const u8 *ysrcp = srcp; + for (u32 y = 0; y < h; y += 8) { + const u8 *xsrcp = ysrcp; + for (u32 x = 0; x < pitch; x += 16) { + const u8 *src = xsrcp; + for (int n = 0; n < 8; ++n) { + memcpy(dstp, src, 16); + src += pitch; + dstp += 16; + } + xsrcp += 16; } - xsrcp += 16; + ysrcp += 8 * pitch; } - ysrcp += 8 * pitch; } RETURN(0); @@ -208,9 +214,11 @@ static int Replace_memmove() { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - u8 *src = Memory::GetPointerUnchecked(srcPtr); - memmove(dst, src, bytes); + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); + if (dst && src) { + memmove(dst, src, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -222,15 +230,17 @@ static int Replace_memmove() { static int Replace_memset() { u32 destPtr = PARAM(0); - u8 *dst = Memory::GetPointerUnchecked(destPtr); u8 value = PARAM(1); u32 bytes = PARAM(2); bool skip = false; if (Memory::IsVRAMAddress(destPtr)) { skip = gpu->PerformMemorySet(destPtr, value, bytes); } - if (!skip) { - memset(dst, value, bytes); + if (!skip && bytes != 0) { + u8 *dst = Memory::GetPointer(destPtr); + if (dst) { + memset(dst, value, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -241,43 +251,55 @@ static int Replace_memset() { static int Replace_strlen() { u32 srcPtr = PARAM(0); - const char *src = (const char *)Memory::GetPointerUnchecked(srcPtr); - u32 len = (u32)strlen(src); + const char *src = (const char *)Memory::GetPointer(srcPtr); + u32 len = src ? (u32)strlen(src) : 0UL; RETURN(len); return 7 + len * 4; // approximation } static int Replace_strcpy() { u32 destPtr = PARAM(0); - char *dst = (char *)Memory::GetPointerUnchecked(destPtr); - const char *src = (const char *)Memory::GetPointerUnchecked(PARAM(1)); - strcpy(dst, src); + char *dst = (char *)Memory::GetPointer(destPtr); + const char *src = (const char *)Memory::GetPointer(PARAM(1)); + if (dst && src) { + strcpy(dst, src); + } RETURN(destPtr); return 10; // approximation } static int Replace_strncpy() { u32 destPtr = PARAM(0); - char *dst = (char *)Memory::GetPointerUnchecked(destPtr); - const char *src = (const char *)Memory::GetPointerUnchecked(PARAM(1)); + char *dst = (char *)Memory::GetPointer(destPtr); + const char *src = (const char *)Memory::GetPointer(PARAM(1)); u32 bytes = PARAM(2); - strncpy(dst, src, bytes); + if (dst && src && bytes != 0) { + strncpy(dst, src, bytes); + } RETURN(destPtr); return 10; // approximation } static int Replace_strcmp() { - const char *a = (const char *)Memory::GetPointerUnchecked(PARAM(0)); - const char *b = (const char *)Memory::GetPointerUnchecked(PARAM(1)); - RETURN(strcmp(a, b)); + const char *a = (const char *)Memory::GetPointer(PARAM(0)); + const char *b = (const char *)Memory::GetPointer(PARAM(1)); + if (a && b) { + RETURN(strcmp(a, b)); + } else { + RETURN(0); + } return 10; // approximation } static int Replace_strncmp() { - const char *a = (const char *)Memory::GetPointerUnchecked(PARAM(0)); - const char *b = (const char *)Memory::GetPointerUnchecked(PARAM(1)); + const char *a = (const char *)Memory::GetPointer(PARAM(0)); + const char *b = (const char *)Memory::GetPointer(PARAM(1)); u32 bytes = PARAM(2); - RETURN(strncmp(a, b, bytes)); + if (a && b && bytes != 0) { + RETURN(strncmp(a, b, bytes)); + } else { + RETURN(0); + } return 10 + bytes / 4; // approximation } @@ -287,12 +309,14 @@ static int Replace_fabsf() { } static int Replace_vmmul_q_transp() { - float *out = (float *)Memory::GetPointerUnchecked(PARAM(0)); - const float *a = (const float *)Memory::GetPointerUnchecked(PARAM(1)); - const float *b = (const float *)Memory::GetPointerUnchecked(PARAM(2)); + float *out = (float *)Memory::GetPointer(PARAM(0)); + const float *a = (const float *)Memory::GetPointer(PARAM(1)); + const float *b = (const float *)Memory::GetPointer(PARAM(2)); // TODO: Actually use an optimized matrix multiply here... - Matrix4ByMatrix4(out, b, a); + if (out && b && a) { + Matrix4ByMatrix4(out, b, a); + } return 16; } @@ -300,46 +324,49 @@ static int Replace_vmmul_q_transp() { // a1 = matrix // a2 = source address static int Replace_gta_dl_write_matrix() { - u32 *ptr = (u32 *)Memory::GetPointerUnchecked(PARAM(0)); - u32 *dest = (u32_le *)Memory::GetPointerUnchecked(ptr[0]); - u32 *src = (u32_le *)Memory::GetPointerUnchecked(PARAM(2)); + u32 *ptr = (u32 *)Memory::GetPointer(PARAM(0)); + u32 *dest = (u32_le *)Memory::GetPointer(ptr[0]); + u32 *src = (u32_le *)Memory::GetPointer(PARAM(2)); u32 matrix = PARAM(1) << 24; + if (ptr && src && dest) { #if defined(_M_IX86) || defined(_M_X64) - __m128i topBytes = _mm_set1_epi32(matrix); - __m128i m0 = _mm_loadu_si128((const __m128i *)src); - __m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4)); - __m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8)); - __m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12)); - m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes); - m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes); - m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes); - m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes); - // These three stores overlap by a word, due to the offsets. - _mm_storeu_si128((__m128i *)dest, m0); - _mm_storeu_si128((__m128i *)(dest + 3), m1); - _mm_storeu_si128((__m128i *)(dest + 6), m2); - // Store the last one in parts to not overwrite forwards (probably mostly risk free though) - _mm_storel_epi64((__m128i *)(dest + 9), m3); - m3 = _mm_srli_si128(m3, 8); - _mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3)); + __m128i topBytes = _mm_set1_epi32(matrix); + __m128i m0 = _mm_loadu_si128((const __m128i *)src); + __m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4)); + __m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8)); + __m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12)); + m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes); + m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes); + m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes); + m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes); + // These three stores overlap by a word, due to the offsets. + _mm_storeu_si128((__m128i *)dest, m0); + _mm_storeu_si128((__m128i *)(dest + 3), m1); + _mm_storeu_si128((__m128i *)(dest + 6), m2); + // Store the last one in parts to not overwrite forwards (probably mostly risk free though) + _mm_storel_epi64((__m128i *)(dest + 9), m3); + m3 = _mm_srli_si128(m3, 8); + _mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3)); #else - // Bit tricky to SIMD (note the offsets) but should be doable if not perfect - dest[0] = matrix | (src[0] >> 8); - dest[1] = matrix | (src[1] >> 8); - dest[2] = matrix | (src[2] >> 8); - dest[3] = matrix | (src[4] >> 8); - dest[4] = matrix | (src[5] >> 8); - dest[5] = matrix | (src[6] >> 8); - dest[6] = matrix | (src[8] >> 8); - dest[7] = matrix | (src[9] >> 8); - dest[8] = matrix | (src[10] >> 8); - dest[9] = matrix | (src[12] >> 8); - dest[10] = matrix | (src[13] >> 8); - dest[11] = matrix | (src[14] >> 8); + // Bit tricky to SIMD (note the offsets) but should be doable if not perfect + dest[0] = matrix | (src[0] >> 8); + dest[1] = matrix | (src[1] >> 8); + dest[2] = matrix | (src[2] >> 8); + dest[3] = matrix | (src[4] >> 8); + dest[4] = matrix | (src[5] >> 8); + dest[5] = matrix | (src[6] >> 8); + dest[6] = matrix | (src[8] >> 8); + dest[7] = matrix | (src[9] >> 8); + dest[8] = matrix | (src[10] >> 8); + dest[9] = matrix | (src[12] >> 8); + dest[10] = matrix | (src[13] >> 8); + dest[11] = matrix | (src[14] >> 8); #endif - (*ptr) += 0x30; + (*ptr) += 0x30; + } + RETURN(0); return 38; } @@ -348,9 +375,14 @@ static int Replace_gta_dl_write_matrix() { // TODO: Inline into a few NEON or SSE instructions - especially if a1 is a known immediate! // Anyway, not sure if worth it. There's not that many matrices written per frame normally. static int Replace_dl_write_matrix() { - u32 *dlStruct = (u32 *)Memory::GetPointerUnchecked(PARAM(0)); - u32 *dest = (u32 *)Memory::GetPointerUnchecked(dlStruct[2]); - u32 *src = (u32 *)Memory::GetPointerUnchecked(PARAM(2)); + u32 *dlStruct = (u32 *)Memory::GetPointer(PARAM(0)); + u32 *dest = (u32 *)Memory::GetPointer(dlStruct[2]); + u32 *src = (u32 *)Memory::GetPointer(PARAM(2)); + + if (!dlStruct || !dest || !src) { + RETURN(0); + return 60; + } u32 matrix; int count = 12; From d060a06fa6ec05857cc03214f280d4dbf0e85a0b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Aug 2014 13:12:45 -0700 Subject: [PATCH 2/4] Disable a bunch of function replacements. These are just for speed, let's turn them off. Using a flag because: * I think there's still some issue with savestates, not sure. * We might swap this flag to a separate option. --- Core/HLE/ReplaceTables.cpp | 53 +++++++++++++++++++------------------- Core/HLE/ReplaceTables.h | 10 ++++--- Core/MIPS/ARM/ArmJit.cpp | 7 ++--- Core/MIPS/x86/Jit.cpp | 7 ++--- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index e42abe3c14..87d2b1997e 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -580,43 +580,44 @@ static const ReplacementTableEntry entries[] = { // should of course be implemented JIT style, inline. /* These two collide (same hash) and thus can't be replaced :/ - { "asinf", &Replace_asinf, 0, 0}, - { "acosf", &Replace_acosf, 0, 0}, + { "asinf", &Replace_asinf, 0, REPFLAG_DISABLED }, + { "acosf", &Replace_acosf, 0, REPFLAG_DISABLED }, */ - { "sinf", &Replace_sinf, 0, 0}, - { "cosf", &Replace_cosf, 0, 0}, - { "tanf", &Replace_tanf, 0, 0}, + { "sinf", &Replace_sinf, 0, REPFLAG_DISABLED }, + { "cosf", &Replace_cosf, 0, REPFLAG_DISABLED }, + { "tanf", &Replace_tanf, 0, REPFLAG_DISABLED }, - { "atanf", &Replace_atanf, 0, 0}, - { "sqrtf", &Replace_sqrtf, 0, 0}, - { "atan2f", &Replace_atan2f, 0, 0}, - { "floorf", &Replace_floorf, 0, 0}, - { "ceilf", &Replace_ceilf, 0, 0}, - { "memcpy", &Replace_memcpy, 0, 0}, - { "memcpy16", &Replace_memcpy16, 0, 0}, - { "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0}, - { "memmove", &Replace_memmove, 0, 0}, - { "memset", &Replace_memset, 0, 0}, - { "strlen", &Replace_strlen, 0, 0}, - { "strcpy", &Replace_strcpy, 0, 0}, - { "strncpy", &Replace_strncpy, 0, 0}, - { "strcmp", &Replace_strcmp, 0, 0}, - { "strncmp", &Replace_strncmp, 0, 0}, - { "fabsf", &Replace_fabsf, &MIPSComp::Jit::Replace_fabsf, REPFLAG_ALLOWINLINE}, - { "dl_write_matrix", &Replace_dl_write_matrix, 0, 0}, // &MIPSComp::Jit::Replace_dl_write_matrix, 0}, - { "dl_write_matrix_2", &Replace_dl_write_matrix, 0, 0}, - { "gta_dl_write_matrix", &Replace_gta_dl_write_matrix, 0, 0}, + { "atanf", &Replace_atanf, 0, REPFLAG_DISABLED }, + { "sqrtf", &Replace_sqrtf, 0, REPFLAG_DISABLED }, + { "atan2f", &Replace_atan2f, 0, REPFLAG_DISABLED }, + { "floorf", &Replace_floorf, 0, REPFLAG_DISABLED }, + { "ceilf", &Replace_ceilf, 0, REPFLAG_DISABLED }, + { "memcpy", &Replace_memcpy, 0, 0 }, + { "memcpy16", &Replace_memcpy16, 0, 0 }, + { "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0 }, + { "memmove", &Replace_memmove, 0, 0 }, + { "memset", &Replace_memset, 0, 0 }, + { "strlen", &Replace_strlen, 0, REPFLAG_DISABLED }, + { "strcpy", &Replace_strcpy, 0, REPFLAG_DISABLED }, + { "strncpy", &Replace_strncpy, 0, REPFLAG_DISABLED }, + { "strcmp", &Replace_strcmp, 0, REPFLAG_DISABLED }, + { "strncmp", &Replace_strncmp, 0, REPFLAG_DISABLED }, + { "fabsf", &Replace_fabsf, &MIPSComp::Jit::Replace_fabsf, REPFLAG_ALLOWINLINE | REPFLAG_DISABLED }, + { "dl_write_matrix", &Replace_dl_write_matrix, 0, REPFLAG_DISABLED }, // &MIPSComp::Jit::Replace_dl_write_matrix, REPFLAG_DISABLED }, + { "dl_write_matrix_2", &Replace_dl_write_matrix, 0, REPFLAG_DISABLED }, + { "gta_dl_write_matrix", &Replace_gta_dl_write_matrix, 0, REPFLAG_DISABLED }, // dl_write_matrix_3 doesn't take the dl as a parameter, it accesses a global instead. Need to extract the address of the global from the code when replacing... // Haven't investigated write_matrix_4 and 5 but I think they are similar to 1 and 2. - // { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, 0}, + // { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, REPFLAG_DISABLED }, { "godseaterburst_blit_texture", &Hook_godseaterburst_blit_texture, 0, REPFLAG_HOOKENTER}, { "hexyzforce_monoclome_thread", &Hook_hexyzforce_monoclome_thread, 0, REPFLAG_HOOKENTER, 0x58}, { "starocean_write_stencil", &Hook_starocean_write_stencil, 0, REPFLAG_HOOKENTER, 0x260}, { "topx_create_saveicon", &Hook_topx_create_saveicon, 0, REPFLAG_HOOKENTER, 0x34}, { "ff1_battle_effect", &Hook_ff1_battle_effect, 0, REPFLAG_HOOKENTER}, + // This is actually used in other games, not just Dissidia. { "dissidia_recordframe_avi", &Hook_dissidia_recordframe_avi, 0, REPFLAG_HOOKENTER}, {} }; @@ -645,7 +646,7 @@ int GetReplacementFuncIndex(u64 hash, int funcSize) { // TODO: Build a lookup and keep it around for (size_t i = 0; i < ARRAY_SIZE(entries); i++) { - if (!entries[i].name) + if (!entries[i].name || (entries[i].flags & REPFLAG_DISABLED) != 0) continue; if (!strcmp(name, entries[i].name)) { return (int)i; diff --git a/Core/HLE/ReplaceTables.h b/Core/HLE/ReplaceTables.h index 1da6396931..e994bd0458 100644 --- a/Core/HLE/ReplaceTables.h +++ b/Core/HLE/ReplaceTables.h @@ -39,11 +39,13 @@ typedef int (* ReplaceFunc)(); enum { - REPFLAG_ALLOWINLINE = 1, - // Note that this will re-execute in a funciton that loops at start. - REPFLAG_HOOKENTER = 2, + REPFLAG_ALLOWINLINE = 0x01, + // Used to keep things around but disable them. + REPFLAG_DISABLED = 0x02, + // Note that this will re-execute in a function that loops at start. + REPFLAG_HOOKENTER = 0x04, // Only hooks jr ra, so only use on funcs that have that. - REPFLAG_HOOKEXIT = 4, + REPFLAG_HOOKEXIT = 0x08, }; // Kind of similar to HLE functions but with different data. diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 450551ba98..b47af4cf7c 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -362,7 +362,7 @@ bool Jit::ReplaceJalTo(u32 dest) { return false; } - if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { + if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT | REPFLAG_DISABLED)) { // If it's a hook, we can't replace the jal, we have to go inside the func. return false; } @@ -415,8 +415,9 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) return; } - // JIT goes first. - if (entry->jitReplaceFunc) { + if (entry->flags & REPFLAG_DISABLED) { + MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true)); + } else if (entry->jitReplaceFunc) { MIPSReplaceFunc repl = entry->jitReplaceFunc; int cycles = (this->*repl)(); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index fd716bce68..d9d92ee32f 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -422,7 +422,7 @@ bool Jit::ReplaceJalTo(u32 dest) { return false; } - if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { + if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT | REPFLAG_DISABLED)) { // If it's a hook, we can't replace the jal, we have to go inside the func. return false; } @@ -471,8 +471,9 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) return; } - // JIT goes first. - if (entry->jitReplaceFunc) { + if (entry->flags & REPFLAG_DISABLED) { + MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true)); + } else if (entry->jitReplaceFunc) { MIPSReplaceFunc repl = entry->jitReplaceFunc; int cycles = (this->*repl)(); From 5525c0a4853e2741b8275aa5708d9848ce663594 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Aug 2014 13:17:55 -0700 Subject: [PATCH 3/4] Use a lookup for function replacements. Ought to be a tiny bit quicker. --- Core/HLE/ReplaceTables.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 87d2b1997e..d93190f741 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -624,12 +624,20 @@ static const ReplacementTableEntry entries[] = { static std::map replacedInstructions; +static std::map replacementNameLookup; void Replacement_Init() { + for (int i = 0; i < (int)ARRAY_SIZE(entries); i++) { + const auto entry = &entries[i]; + if (!entry->name || (entry->flags & REPFLAG_DISABLED) != 0) + continue; + replacementNameLookup[entry->name] = i; + } } void Replacement_Shutdown() { replacedInstructions.clear(); + replacementNameLookup.clear(); } // TODO: Do something on load state? @@ -644,13 +652,9 @@ int GetReplacementFuncIndex(u64 hash, int funcSize) { return -1; } - // TODO: Build a lookup and keep it around - for (size_t i = 0; i < ARRAY_SIZE(entries); i++) { - if (!entries[i].name || (entries[i].flags & REPFLAG_DISABLED) != 0) - continue; - if (!strcmp(name, entries[i].name)) { - return (int)i; - } + auto index = replacementNameLookup.find(name); + if (index != replacementNameLookup.end()) { + return index->second; } return -1; } From 245a2a3be07fdbbfc4fe7fad21c9d05bcb0cb2e7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Aug 2014 13:22:30 -0700 Subject: [PATCH 4/4] Don't zero out downcount in replacements. It doesn't write out js.downcountAmount in any of these cases, so zeroing it is wrong. --- Core/MIPS/ARM/ArmJit.cpp | 4 +--- Core/MIPS/x86/Jit.cpp | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index b47af4cf7c..219c3c7147 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -388,7 +388,6 @@ bool Jit::ReplaceJalTo(u32 dest) { BL(R0); } WriteDownCountR(R0); - js.downcountAmount = 0; // we just subtracted most of it } js.compilerPC += 4; @@ -428,7 +427,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) FlushAll(); // Flushed, so R1 is safe. LDR(R1, CTXREG, MIPS_REG_RA * 4); - js.downcountAmount = cycles; + js.downcountAmount += cycles; WriteExitDestInR(R1); js.compiling = false; } @@ -452,7 +451,6 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) } else { LDR(R1, CTXREG, MIPS_REG_RA * 4); WriteDownCountR(R0); - js.downcountAmount = 0; // we just subtracted most of it WriteExitDestInR(R1); js.compiling = false; } diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index d9d92ee32f..74cfd97a6a 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -444,7 +444,6 @@ bool Jit::ReplaceJalTo(u32 dest) { MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); ABI_CallFunction(entry->replaceFunc); SUB(32, M(¤tMIPS->downcount), R(EAX)); - js.downcountAmount = 0; // we just subtracted most of it } js.compilerPC += 4; @@ -483,7 +482,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) } else { FlushAll(); MOV(32, R(ECX), M(¤tMIPS->r[MIPS_REG_RA])); - js.downcountAmount = cycles; + js.downcountAmount += cycles; WriteExitDestInReg(ECX); js.compiling = false; } @@ -501,7 +500,6 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) } else { MOV(32, R(ECX), M(¤tMIPS->r[MIPS_REG_RA])); SUB(32, M(¤tMIPS->downcount), R(EAX)); - js.downcountAmount = 0; // we just subtracted most of it WriteExitDestInReg(ECX); js.compiling = false; }