pcsx-redux/third_party/EASTL/source/string.cpp
2024-09-27 21:01:20 -07:00

464 lines
13 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// Copyright (c) Electronic Arts Inc. All rights reserved.
///////////////////////////////////////////////////////////////////////////////
#include <EASTL/internal/config.h>
#include <EASTL/string.h>
#include <EABase/eabase.h>
//#include <string.h>
namespace eastl
{
///////////////////////////////////////////////////////////////////////////////
// Converters for DecodePart
//
// For some decent documentation about conversions, see:
// http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c
//
///////////////////////////////////////////////////////////////////////////////
// Requires that pDest have a capacity of at least 6 chars.
// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
inline bool UCS4ToUTF8(uint32_t c, char*& pResult)
{
if(c < 0x00000080)
*pResult++ = (char)(uint8_t)c;
else if(c < 0x0800)
{
*pResult++ = (char)(uint8_t)(0xC0 | (c >> 6));
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
}
else if(c <= 0x0000FFFF)
{
*pResult++ = (char)(uint8_t)(0xE0 | (c >> 12));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
}
else if(c <= 0x001FFFFF)
{
*pResult++ = (char)(uint8_t)(0xF0 | (c >> 18));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
}
else if(c <= 0x003FFFFFF)
{
*pResult++ = (char)(uint8_t)(0xF8 | (c >> 24));
*pResult++ = (char)(uint8_t)(0x80 | (c >> 18));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
}
else if(c <= 0x7FFFFFFF)
{
*pResult++ = (char)(uint8_t)(0xFC | (c >> 30));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 24) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 18) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
}
else
{
// values >= 0x80000000 can't be converted to UTF8.
*pResult++ = '\1';
return false;
}
return true;
}
// Requires that pResult have a capacity of at least 3 chars.
// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
inline bool UCS2ToUTF8(uint16_t c, char*& pResult)
{
return UCS4ToUTF8(c, pResult);
}
// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
// 32 bit 0xffffffff is an invalid UCS4 code point, so we can't use that as an error return value.
inline bool UTF8ToUCS4(const char*& p, const char* pEnd, uint32_t& result)
{
// This could likely be implemented in a faster-executing way that uses tables.
bool success = true;
uint32_t c = 0xffff;
const char* pNext = NULL;
if(p < pEnd)
{
uint8_t cChar0((uint8_t)*p), cChar1, cChar2, cChar3;
// Asserts are disabled because we don't necessarily want to interrupt runtime execution due to this.
// EASTL_ASSERT((cChar0 != 0xFE) && (cChar0 != 0xFF)); // No byte can be 0xFE or 0xFF
// Code below will effectively catch this error as it goes.
if(cChar0 < 0x80)
{
pNext = p + 1;
c = cChar0;
}
else
{
//EASTL_ASSERT((cChar0 & 0xC0) == 0xC0); // The top two bits need to be equal to 1
if((cChar0 & 0xC0) != 0xC0)
{
success = false;
goto Failure;
}
if((cChar0 & 0xE0) == 0xC0)
{
pNext = p + 2;
if(pNext <= pEnd)
{
c = (uint32_t)((cChar0 & 0x1F) << 6);
cChar1 = static_cast<uint8_t>(p[1]);
c |= cChar1 & 0x3F;
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT(c >= 0x0080 && c < 0x0800); // Check that we have the smallest coding
if(!((cChar1 & 0xC0) == 0x80) ||
!(c >= 0x0080 && c < 0x0800))
{
success = false;
goto Failure;
}
}
else
{
success = false;
goto Failure;
}
}
else if((cChar0 & 0xF0) == 0xE0)
{
pNext = p + 3;
if(pNext <= pEnd)
{
c = (uint32_t)((cChar0 & 0xF) << 12);
cChar1 = static_cast<uint8_t>(p[1]);
c |= (cChar1 & 0x3F) << 6;
cChar2 = static_cast<uint8_t>(p[2]);
c |= cChar2 & 0x3F;
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT((cChar2 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT(c >= 0x00000800 && c < 0x00010000); // Check that we have the smallest coding
if(!((cChar1 & 0xC0) == 0x80) ||
!((cChar2 & 0xC0) == 0x80) ||
!(c >= 0x00000800 && c < 0x00010000))
{
success = false;
goto Failure;
}
}
else
{
success = false;
goto Failure;
}
}
else if((cChar0 & 0xF8) == 0xF0)
{
pNext = p + 4;
if(pNext <= pEnd)
{
c = (uint32_t)((cChar0 & 0x7) << 18);
cChar1 = static_cast<uint8_t>(p[1]);
c |= (uint32_t)((cChar1 & 0x3F) << 12);
cChar2 = static_cast<uint8_t>(p[2]);
c |= (cChar2 & 0x3F) << 6;
cChar3 = static_cast<uint8_t>(p[3]);
c |= cChar3 & 0x3F;
//EASTL_ASSERT((cChar0 & 0xf8) == 0xf0); // We handle the unicode but not UCS-4
//EASTL_ASSERT((cChar1 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT((cChar2 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT((cChar3 & 0xC0) == 0x80); // All subsequent code should be b10xxxxxx
//EASTL_ASSERT(c >= 0x00010000 && c <= 0x0010FFFF); // Check that we have the smallest coding, Unicode and not ucs-4
if(!((cChar0 & 0xf8) == 0xf0) ||
!((cChar1 & 0xC0) == 0x80) ||
!((cChar2 & 0xC0) == 0x80) ||
!(c >= 0x00010000 && c <= 0x0010FFFF))
{
success = false;
goto Failure;
}
}
else
{
success = false;
goto Failure;
}
}
else if((cChar0 & 0xFC) == 0xF8)
{
pNext = p + 4;
if(pNext <= pEnd)
{
// To do. We don't currently support extended UCS4 characters.
}
else
{
success = false;
goto Failure;
}
}
else if((cChar0 & 0xFE) == 0xFC)
{
pNext = p + 5;
if(pNext <= pEnd)
{
// To do. We don't currently support extended UCS4 characters.
}
else
{
success = false;
goto Failure;
}
}
else
{
success = false;
goto Failure;
}
}
}
else
success = false;
Failure:
if(success)
{
p = pNext;
result = c;
}
else
{
p = p + 1;
result = 0xffff;
}
return success;
}
// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
// The effect of converting UTF8 codepoints > 0xffff to UCS2 (char16_t) is to set all
// such codepoints to 0xffff. EASTL doesn't have a concept of setting or maintaining
// error state for string conversions, though it does have a policy of converting
// impossible values to something without generating invalid strings or throwing exceptions.
inline bool UTF8ToUCS2(const char*& p, const char* pEnd, uint16_t& result)
{
uint32_t u32;
if(UTF8ToUCS4(p, pEnd, u32))
{
if(u32 <= 0xffff)
{
result = (uint16_t)u32;
return true;
}
}
result = 0xffff;
return false;
}
///////////////////////////////////////////////////////////////////////////
// DecodePart
///////////////////////////////////////////////////////////////////////////
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*& pDest, char* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
sourceSize = destSize;
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
pSrc += sourceSize;
pDest += sourceSize; // Intentionally add sourceSize here.
return true;
}
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
{
bool success = true;
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
success = UTF8ToUCS2(pSrc, pSrcEnd, (uint16_t&)*pDest++);
return success;
}
EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
{
bool success = true;
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
success = UTF8ToUCS4(pSrc, pSrcEnd, (uint32_t&)*pDest++);
return success;
}
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*& pDest, char* pDestEnd)
{
bool success = true;
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
success = UCS2ToUTF8(*pSrc++, pDest);
return success;
}
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
sourceSize = destSize;
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
pSrc += sourceSize;
pDest += sourceSize; // Intentionally add sourceSize here.
return true;
}
EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
pSrcEnd = pSrc + destSize;
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
*pDest++ = (char32_t)*pSrc++;
return true;
}
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*& pDest, char* pDestEnd)
{
bool success = true;
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
success = UCS4ToUTF8(*pSrc++, pDest);
return success;
}
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
pSrcEnd = pSrc + destSize;
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
*pDest++ = (char16_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
return true;
}
EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
sourceSize = destSize;
__builtin_memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
pSrc += sourceSize;
pDest += sourceSize; // Intentionally add sourceSize here.
return true;
}
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*& pDest, char* pDestEnd)
{
bool success = true;
EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
success = UCS4ToUTF8((uint32_t)(unsigned)*pSrc++, pDest);
return success;
}
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
pSrcEnd = pSrc + destSize;
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
*pDest++ = (char16_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
return true;
}
EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
{
size_t sourceSize = (size_t)(pSrcEnd - pSrc);
size_t destSize = (size_t)(pDestEnd - pDest);
if(sourceSize > destSize)
pSrcEnd = pSrc + destSize;
while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
*pDest++ = (char32_t)*pSrc++; // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
return true;
}
} // namespace eastl