diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 58967b3a8f..e42abe3c14 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -113,11 +113,13 @@ static int Replace_memcpy() { if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } - if (!skip && bytes != 0 && destPtr != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - const u8 *src = Memory::GetPointerUnchecked(srcPtr); + if (!skip && bytes != 0) { + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); - if (std::min(destPtr, srcPtr) + bytes > std::max(destPtr, srcPtr)) { + if (!dst || !src) { + // Already logged. + } else if (std::min(destPtr, srcPtr) + bytes > std::max(destPtr, srcPtr)) { // Overlap. Star Ocean breaks if it's not handled in 16 bytes blocks. const u32 blocks = bytes & ~0x0f; for (u32 offset = 0; offset < blocks; offset += 0x10) { @@ -150,9 +152,11 @@ static int Replace_memcpy16() { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - u8 *src = Memory::GetPointerUnchecked(srcPtr); - memmove(dst, src, bytes); + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); + if (dst && src) { + memmove(dst, src, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -170,22 +174,24 @@ static int Replace_memcpy_swizzled() { if (Memory::IsVRAMAddress(srcPtr)) { gpu->PerformMemoryDownload(srcPtr, pitch * h); } - u8 *dstp = Memory::GetPointerUnchecked(destPtr); - const u8 *srcp = Memory::GetPointerUnchecked(srcPtr); + u8 *dstp = Memory::GetPointer(destPtr); + const u8 *srcp = Memory::GetPointer(srcPtr); - const u8 *ysrcp = srcp; - for (u32 y = 0; y < h; y += 8) { - const u8 *xsrcp = ysrcp; - for (u32 x = 0; x < pitch; x += 16) { - const u8 *src = xsrcp; - for (int n = 0; n < 8; ++n) { - memcpy(dstp, src, 16); - src += pitch; - dstp += 16; + if (dstp && srcp) { + const u8 *ysrcp = srcp; + for (u32 y = 0; y < h; y += 8) { + const u8 *xsrcp = ysrcp; + for (u32 x = 0; x < pitch; x += 16) { + const u8 *src = xsrcp; + for (int n = 0; n < 8; ++n) { + memcpy(dstp, src, 16); + src += pitch; + dstp += 16; + } + xsrcp += 16; } - xsrcp += 16; + ysrcp += 8 * pitch; } - ysrcp += 8 * pitch; } RETURN(0); @@ -208,9 +214,11 @@ static int Replace_memmove() { skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { - u8 *dst = Memory::GetPointerUnchecked(destPtr); - u8 *src = Memory::GetPointerUnchecked(srcPtr); - memmove(dst, src, bytes); + u8 *dst = Memory::GetPointer(destPtr); + const u8 *src = Memory::GetPointer(srcPtr); + if (dst && src) { + memmove(dst, src, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -222,15 +230,17 @@ static int Replace_memmove() { static int Replace_memset() { u32 destPtr = PARAM(0); - u8 *dst = Memory::GetPointerUnchecked(destPtr); u8 value = PARAM(1); u32 bytes = PARAM(2); bool skip = false; if (Memory::IsVRAMAddress(destPtr)) { skip = gpu->PerformMemorySet(destPtr, value, bytes); } - if (!skip) { - memset(dst, value, bytes); + if (!skip && bytes != 0) { + u8 *dst = Memory::GetPointer(destPtr); + if (dst) { + memset(dst, value, bytes); + } } RETURN(destPtr); #ifndef MOBILE_DEVICE @@ -241,43 +251,55 @@ static int Replace_memset() { static int Replace_strlen() { u32 srcPtr = PARAM(0); - const char *src = (const char *)Memory::GetPointerUnchecked(srcPtr); - u32 len = (u32)strlen(src); + const char *src = (const char *)Memory::GetPointer(srcPtr); + u32 len = src ? (u32)strlen(src) : 0UL; RETURN(len); return 7 + len * 4; // approximation } static int Replace_strcpy() { u32 destPtr = PARAM(0); - char *dst = (char *)Memory::GetPointerUnchecked(destPtr); - const char *src = (const char *)Memory::GetPointerUnchecked(PARAM(1)); - strcpy(dst, src); + char *dst = (char *)Memory::GetPointer(destPtr); + const char *src = (const char *)Memory::GetPointer(PARAM(1)); + if (dst && src) { + strcpy(dst, src); + } RETURN(destPtr); return 10; // approximation } static int Replace_strncpy() { u32 destPtr = PARAM(0); - char *dst = (char *)Memory::GetPointerUnchecked(destPtr); - const char *src = (const char *)Memory::GetPointerUnchecked(PARAM(1)); + char *dst = (char *)Memory::GetPointer(destPtr); + const char *src = (const char *)Memory::GetPointer(PARAM(1)); u32 bytes = PARAM(2); - strncpy(dst, src, bytes); + if (dst && src && bytes != 0) { + strncpy(dst, src, bytes); + } RETURN(destPtr); return 10; // approximation } static int Replace_strcmp() { - const char *a = (const char *)Memory::GetPointerUnchecked(PARAM(0)); - const char *b = (const char *)Memory::GetPointerUnchecked(PARAM(1)); - RETURN(strcmp(a, b)); + const char *a = (const char *)Memory::GetPointer(PARAM(0)); + const char *b = (const char *)Memory::GetPointer(PARAM(1)); + if (a && b) { + RETURN(strcmp(a, b)); + } else { + RETURN(0); + } return 10; // approximation } static int Replace_strncmp() { - const char *a = (const char *)Memory::GetPointerUnchecked(PARAM(0)); - const char *b = (const char *)Memory::GetPointerUnchecked(PARAM(1)); + const char *a = (const char *)Memory::GetPointer(PARAM(0)); + const char *b = (const char *)Memory::GetPointer(PARAM(1)); u32 bytes = PARAM(2); - RETURN(strncmp(a, b, bytes)); + if (a && b && bytes != 0) { + RETURN(strncmp(a, b, bytes)); + } else { + RETURN(0); + } return 10 + bytes / 4; // approximation } @@ -287,12 +309,14 @@ static int Replace_fabsf() { } static int Replace_vmmul_q_transp() { - float *out = (float *)Memory::GetPointerUnchecked(PARAM(0)); - const float *a = (const float *)Memory::GetPointerUnchecked(PARAM(1)); - const float *b = (const float *)Memory::GetPointerUnchecked(PARAM(2)); + float *out = (float *)Memory::GetPointer(PARAM(0)); + const float *a = (const float *)Memory::GetPointer(PARAM(1)); + const float *b = (const float *)Memory::GetPointer(PARAM(2)); // TODO: Actually use an optimized matrix multiply here... - Matrix4ByMatrix4(out, b, a); + if (out && b && a) { + Matrix4ByMatrix4(out, b, a); + } return 16; } @@ -300,46 +324,49 @@ static int Replace_vmmul_q_transp() { // a1 = matrix // a2 = source address static int Replace_gta_dl_write_matrix() { - u32 *ptr = (u32 *)Memory::GetPointerUnchecked(PARAM(0)); - u32 *dest = (u32_le *)Memory::GetPointerUnchecked(ptr[0]); - u32 *src = (u32_le *)Memory::GetPointerUnchecked(PARAM(2)); + u32 *ptr = (u32 *)Memory::GetPointer(PARAM(0)); + u32 *dest = (u32_le *)Memory::GetPointer(ptr[0]); + u32 *src = (u32_le *)Memory::GetPointer(PARAM(2)); u32 matrix = PARAM(1) << 24; + if (ptr && src && dest) { #if defined(_M_IX86) || defined(_M_X64) - __m128i topBytes = _mm_set1_epi32(matrix); - __m128i m0 = _mm_loadu_si128((const __m128i *)src); - __m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4)); - __m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8)); - __m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12)); - m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes); - m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes); - m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes); - m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes); - // These three stores overlap by a word, due to the offsets. - _mm_storeu_si128((__m128i *)dest, m0); - _mm_storeu_si128((__m128i *)(dest + 3), m1); - _mm_storeu_si128((__m128i *)(dest + 6), m2); - // Store the last one in parts to not overwrite forwards (probably mostly risk free though) - _mm_storel_epi64((__m128i *)(dest + 9), m3); - m3 = _mm_srli_si128(m3, 8); - _mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3)); + __m128i topBytes = _mm_set1_epi32(matrix); + __m128i m0 = _mm_loadu_si128((const __m128i *)src); + __m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4)); + __m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8)); + __m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12)); + m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes); + m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes); + m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes); + m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes); + // These three stores overlap by a word, due to the offsets. + _mm_storeu_si128((__m128i *)dest, m0); + _mm_storeu_si128((__m128i *)(dest + 3), m1); + _mm_storeu_si128((__m128i *)(dest + 6), m2); + // Store the last one in parts to not overwrite forwards (probably mostly risk free though) + _mm_storel_epi64((__m128i *)(dest + 9), m3); + m3 = _mm_srli_si128(m3, 8); + _mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3)); #else - // Bit tricky to SIMD (note the offsets) but should be doable if not perfect - dest[0] = matrix | (src[0] >> 8); - dest[1] = matrix | (src[1] >> 8); - dest[2] = matrix | (src[2] >> 8); - dest[3] = matrix | (src[4] >> 8); - dest[4] = matrix | (src[5] >> 8); - dest[5] = matrix | (src[6] >> 8); - dest[6] = matrix | (src[8] >> 8); - dest[7] = matrix | (src[9] >> 8); - dest[8] = matrix | (src[10] >> 8); - dest[9] = matrix | (src[12] >> 8); - dest[10] = matrix | (src[13] >> 8); - dest[11] = matrix | (src[14] >> 8); + // Bit tricky to SIMD (note the offsets) but should be doable if not perfect + dest[0] = matrix | (src[0] >> 8); + dest[1] = matrix | (src[1] >> 8); + dest[2] = matrix | (src[2] >> 8); + dest[3] = matrix | (src[4] >> 8); + dest[4] = matrix | (src[5] >> 8); + dest[5] = matrix | (src[6] >> 8); + dest[6] = matrix | (src[8] >> 8); + dest[7] = matrix | (src[9] >> 8); + dest[8] = matrix | (src[10] >> 8); + dest[9] = matrix | (src[12] >> 8); + dest[10] = matrix | (src[13] >> 8); + dest[11] = matrix | (src[14] >> 8); #endif - (*ptr) += 0x30; + (*ptr) += 0x30; + } + RETURN(0); return 38; } @@ -348,9 +375,14 @@ static int Replace_gta_dl_write_matrix() { // TODO: Inline into a few NEON or SSE instructions - especially if a1 is a known immediate! // Anyway, not sure if worth it. There's not that many matrices written per frame normally. static int Replace_dl_write_matrix() { - u32 *dlStruct = (u32 *)Memory::GetPointerUnchecked(PARAM(0)); - u32 *dest = (u32 *)Memory::GetPointerUnchecked(dlStruct[2]); - u32 *src = (u32 *)Memory::GetPointerUnchecked(PARAM(2)); + u32 *dlStruct = (u32 *)Memory::GetPointer(PARAM(0)); + u32 *dest = (u32 *)Memory::GetPointer(dlStruct[2]); + u32 *src = (u32 *)Memory::GetPointer(PARAM(2)); + + if (!dlStruct || !dest || !src) { + RETURN(0); + return 60; + } u32 matrix; int count = 12;