mirror of
https://github.com/grumpycoders/pcsx-redux.git
synced 2025-04-02 10:41:54 -04:00
464 lines
13 KiB
C++
464 lines
13 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// Copyright (c) Electronic Arts Inc. All rights reserved.
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
#include <EASTL/internal/config.h>
|
|
#include <EASTL/string.h>
|
|
#include <EABase/eabase.h>
|
|
//#include <string.h>
|
|
|
|
|
|
namespace eastl
|
|
{
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Converters for DecodePart
|
|
//
|
|
// For some decent documentation about conversions, see:
|
|
// http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Requires that pDest have a capacity of at least 6 chars.
|
|
// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
|
|
inline bool UCS4ToUTF8(uint32_t c, char*& pResult)
|
|
{
|
|
if(c < 0x00000080)
|
|
*pResult++ = (char)(uint8_t)c;
|
|
else if(c < 0x0800)
|
|
{
|
|
*pResult++ = (char)(uint8_t)(0xC0 | (c >> 6));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
|
|
}
|
|
else if(c <= 0x0000FFFF)
|
|
{
|
|
*pResult++ = (char)(uint8_t)(0xE0 | (c >> 12));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
|
|
}
|
|
else if(c <= 0x001FFFFF)
|
|
{
|
|
*pResult++ = (char)(uint8_t)(0xF0 | (c >> 18));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
|
|
}
|
|
else if(c <= 0x003FFFFFF)
|
|
{
|
|
*pResult++ = (char)(uint8_t)(0xF8 | (c >> 24));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c >> 18));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
|
|
}
|
|
else if(c <= 0x7FFFFFFF)
|
|
{
|
|
*pResult++ = (char)(uint8_t)(0xFC | (c >> 30));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 24) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 18) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
|
|
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
|
|
}
|
|
else
|
|
{
|
|
// values >= 0x80000000 can't be converted to UTF8.
|
|
*pResult++ = '\1';
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// Requires that pResult have a capacity of at least 3 chars.
|
|
// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
|
|
inline bool UCS2ToUTF8(uint16_t c, char*& pResult)
|
|
{
|
|
return UCS4ToUTF8(c, pResult);
|
|
}
|
|
|
|
|
|
// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
|
|
// 32 bit 0xffffffff is an invalid UCS4 code point, so we can't use that as an error return value.
|
|
inline bool UTF8ToUCS4(const char*& p, const char* pEnd, uint32_t& result)
|
|
{
|
|
// This could likely be implemented in a faster-executing way that uses tables.
|
|
|
|
bool success = true;
|
|
uint32_t c = 0xffff;
|
|
const char* pNext = NULL;
|
|
|
|
if(p < pEnd)
|
|
{
|
|
uint8_t cChar0((uint8_t)*p), cChar1, cChar2, cChar3;
|
|
|
|
// Asserts are disabled because we don't necessarily want to interrupt runtime execution due to this.
|
|
// EASTL_ASSERT((cChar0 != 0xFE) && (cChar0 != 0xFF)); // No byte can be 0xFE or 0xFF
|
|
// Code below will effectively catch this error as it goes.
|
|
|
|
if(cChar0 < 0x80)
|
|
{
|
|
pNext = p + 1;
|
|
c = cChar0;
|
|
}
|
|
else
|
|
{
|
|
//EASTL_ASSERT((cChar0 & 0xC0) == 0xC0); // The top two bits need to be equal to 1
|
|
if((cChar0 & 0xC0) != 0xC0)
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
|
|
if((cChar0 & 0xE0) == 0xC0)
|
|
{
|
|
pNext = p + 2;
|
|
|
|
if(pNext <= pEnd)
|
|
{
|
|
c = (uint32_t)((cChar0 & 0x1F) << 6);
|
|
cChar1 = static_cast<uint8_t>(p[1]);
|
|
c |= cChar1 & 0x3F;
|
|
|
|
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT(c >= 0x0080 && c < 0x0800); // Check that we have the smallest coding
|
|
if(!((cChar1 & 0xC0) == 0x80) ||
|
|
!(c >= 0x0080 && c < 0x0800))
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else if((cChar0 & 0xF0) == 0xE0)
|
|
{
|
|
pNext = p + 3;
|
|
|
|
if(pNext <= pEnd)
|
|
{
|
|
c = (uint32_t)((cChar0 & 0xF) << 12);
|
|
cChar1 = static_cast<uint8_t>(p[1]);
|
|
c |= (cChar1 & 0x3F) << 6;
|
|
cChar2 = static_cast<uint8_t>(p[2]);
|
|
c |= cChar2 & 0x3F;
|
|
|
|
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT((cChar2 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT(c >= 0x00000800 && c < 0x00010000); // Check that we have the smallest coding
|
|
if(!((cChar1 & 0xC0) == 0x80) ||
|
|
!((cChar2 & 0xC0) == 0x80) ||
|
|
!(c >= 0x00000800 && c < 0x00010000))
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else if((cChar0 & 0xF8) == 0xF0)
|
|
{
|
|
pNext = p + 4;
|
|
|
|
if(pNext <= pEnd)
|
|
{
|
|
c = (uint32_t)((cChar0 & 0x7) << 18);
|
|
cChar1 = static_cast<uint8_t>(p[1]);
|
|
c |= (uint32_t)((cChar1 & 0x3F) << 12);
|
|
cChar2 = static_cast<uint8_t>(p[2]);
|
|
c |= (cChar2 & 0x3F) << 6;
|
|
cChar3 = static_cast<uint8_t>(p[3]);
|
|
c |= cChar3 & 0x3F;
|
|
|
|
//EASTL_ASSERT((cChar0 & 0xf8) == 0xf0); // We handle the unicode but not UCS-4
|
|
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT((cChar2 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT((cChar3 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
|
|
//EASTL_ASSERT(c >= 0x00010000 && c <= 0x0010FFFF); // Check that we have the smallest coding, Unicode and not ucs-4
|
|
if(!((cChar0 & 0xf8) == 0xf0) ||
|
|
!((cChar1 & 0xC0) == 0x80) ||
|
|
!((cChar2 & 0xC0) == 0x80) ||
|
|
!(c >= 0x00010000 && c <= 0x0010FFFF))
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else if((cChar0 & 0xFC) == 0xF8)
|
|
{
|
|
pNext = p + 4;
|
|
|
|
if(pNext <= pEnd)
|
|
{
|
|
// To do. We don't currently support extended UCS4 characters.
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else if((cChar0 & 0xFE) == 0xFC)
|
|
{
|
|
pNext = p + 5;
|
|
|
|
if(pNext <= pEnd)
|
|
{
|
|
// To do. We don't currently support extended UCS4 characters.
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
success = false;
|
|
goto Failure;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
success = false;
|
|
|
|
Failure:
|
|
if(success)
|
|
{
|
|
p = pNext;
|
|
result = c;
|
|
}
|
|
else
|
|
{
|
|
p = p + 1;
|
|
result = 0xffff;
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
|
|
// The effect of converting UTF8 codepoints > 0xffff to UCS2 (char16_t) is to set all
|
|
// such codepoints to 0xffff. EASTL doesn't have a concept of setting or maintaining
|
|
// error state for string conversions, though it does have a policy of converting
|
|
// impossible values to something without generating invalid strings or throwing exceptions.
|
|
inline bool UTF8ToUCS2(const char*& p, const char* pEnd, uint16_t& result)
|
|
{
|
|
uint32_t u32;
|
|
|
|
if(UTF8ToUCS4(p, pEnd, u32))
|
|
{
|
|
if(u32 <= 0xffff)
|
|
{
|
|
result = (uint16_t)u32;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
result = 0xffff;
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
// DecodePart
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*& pDest, char* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
sourceSize = destSize;
|
|
|
|
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
|
|
|
|
pSrc += sourceSize;
|
|
pDest += sourceSize; // Intentionally add sourceSize here.
|
|
|
|
return true;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
|
|
{
|
|
bool success = true;
|
|
|
|
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
|
|
success = UTF8ToUCS2(pSrc, pSrcEnd, (uint16_t&)*pDest++);
|
|
|
|
return success;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
|
|
{
|
|
bool success = true;
|
|
|
|
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
|
|
success = UTF8ToUCS4(pSrc, pSrcEnd, (uint32_t&)*pDest++);
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*& pDest, char* pDestEnd)
|
|
{
|
|
bool success = true;
|
|
|
|
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
|
|
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
|
|
|
|
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
|
|
success = UCS2ToUTF8(*pSrc++, pDest);
|
|
|
|
return success;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
sourceSize = destSize;
|
|
|
|
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
|
|
|
|
pSrc += sourceSize;
|
|
pDest += sourceSize; // Intentionally add sourceSize here.
|
|
|
|
return true;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
pSrcEnd = pSrc + destSize;
|
|
|
|
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
|
|
*pDest++ = (char32_t)*pSrc++;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*& pDest, char* pDestEnd)
|
|
{
|
|
bool success = true;
|
|
|
|
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
|
|
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
|
|
|
|
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
|
|
success = UCS4ToUTF8(*pSrc++, pDest);
|
|
|
|
return success;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
pSrcEnd = pSrc + destSize;
|
|
|
|
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
|
|
*pDest++ = (char16_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
|
|
|
|
return true;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
sourceSize = destSize;
|
|
|
|
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
|
|
|
|
pSrc += sourceSize;
|
|
pDest += sourceSize; // Intentionally add sourceSize here.
|
|
|
|
return true;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*& pDest, char* pDestEnd)
|
|
{
|
|
bool success = true;
|
|
|
|
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
|
|
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
|
|
|
|
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
|
|
success = UCS4ToUTF8((uint32_t)(unsigned)*pSrc++, pDest);
|
|
|
|
return success;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
pSrcEnd = pSrc + destSize;
|
|
|
|
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
|
|
*pDest++ = (char16_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
|
|
|
|
return true;
|
|
}
|
|
|
|
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
|
|
{
|
|
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
|
|
size_t destSize = (size_t)(pDestEnd - pDest);
|
|
|
|
if(sourceSize > destSize)
|
|
pSrcEnd = pSrc + destSize;
|
|
|
|
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
|
|
*pDest++ = (char32_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
} // namespace eastl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|