Add string_view support to more UTF functions

This commit is contained in:
Henrik Rydgård 2024-01-12 12:34:47 +01:00
parent 9c9fe8bf8f
commit 0fc3e2b777
6 changed files with 43 additions and 43 deletions

View file

@ -383,7 +383,7 @@ bool AnyEmojiInString(std::string_view str, size_t byteCount) {
return false;
}
int UTF8StringNonASCIICount(const char *utf8string) {
int UTF8StringNonASCIICount(std::string_view utf8string) {
UTF8 utf(utf8string);
int count = 0;
while (!utf.end()) {
@ -394,7 +394,7 @@ int UTF8StringNonASCIICount(const char *utf8string) {
return count;
}
bool UTF8StringHasNonASCII(const char *utf8string) {
bool UTF8StringHasNonASCII(std::string_view utf8string) {
return UTF8StringNonASCIICount(utf8string) > 0;
}
@ -422,25 +422,21 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
return s;
}
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source) {
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source) {
int len = (int)source.size();
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, dest, std::min((int)destSize, size));
destSize -= 1; // account for the \0.
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source.data(), len, dest, std::min((int)destSize, size));
dest[size] = 0;
}
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source) {
int len = (int)strlen(source) + 1; // include trailing zero
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source, len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source, len, dest, std::min((int)destSize, size));
}
std::wstring ConvertUTF8ToWString(const std::string &source) {
std::wstring ConvertUTF8ToWString(const std::string_view source) {
int len = (int)source.size();
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
std::wstring str;
str.resize(size);
if (size > 0) {
MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, &str[0], size);
MultiByteToWideChar(CP_UTF8, 0, source.data(), source.size(), &str[0], size);
}
return str;
}
@ -477,11 +473,11 @@ std::string SanitizeUTF8(std::string_view utf8string) {
return s;
}
static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const std::string &source) {
static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, std::string_view source) {
const char16_t *const orig = dest;
const char16_t *const destEnd = dest + destSize;
UTF8 utf(source.c_str());
UTF8 utf(source);
char16_t *destw = (char16_t *)dest;
const char16_t *const destwEnd = destw + destSize;
@ -494,7 +490,7 @@ static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const s
destw += UTF16LE::encodeUCS2(destw, c);
}
// No ++ to not count the terminal in length.
// No ++ to not count the null-terminator in length.
if (destw < destEnd) {
*destw = 0;
}
@ -506,11 +502,11 @@ void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &sourc
ConvertUTF8ToUCS2Internal(dest, destSize, source);
}
std::u16string ConvertUTF8ToUCS2(const std::string &source) {
std::u16string ConvertUTF8ToUCS2(std::string_view source) {
std::u16string dst;
// utf-8 won't be less bytes than there are characters. But need +1 for terminator.
dst.resize(source.size() + 1, 0);
size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size() + 1, source);
// utf-8 won't be less bytes than there are characters.
dst.resize(source.size(), 0);
size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size(), source);
dst.resize(realLen);
return dst;
}
@ -539,11 +535,11 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
return s;
}
static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const std::string &source) {
static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, std::string_view source) {
const wchar_t *const orig = dest;
const wchar_t *const destEnd = dest + destSize;
UTF8 utf(source.c_str());
UTF8 utf(source);
if (sizeof(wchar_t) == 2) {
char16_t *destw = (char16_t *)dest;
@ -572,12 +568,13 @@ static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const
return dest - orig;
}
std::wstring ConvertUTF8ToWString(const std::string &source) {
std::wstring ConvertUTF8ToWString(std::string_view source) {
std::wstring dst;
// utf-8 won't be less bytes than there are characters. But need +1 for terminator.
dst.resize(source.size() + 1, 0);
size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size() + 1, source);
// utf-8 won't be less bytes than there are characters.
dst.resize(source.size(), 0);
size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size(), source);
dst.resize(realLen);
dst[realLen] = 0;
return dst;
}

View file

@ -93,9 +93,9 @@ private:
int size_;
};
int UTF8StringNonASCIICount(const char *utf8string);
int UTF8StringNonASCIICount(std::string_view utf8string);
bool UTF8StringHasNonASCII(const char *utf8string);
bool UTF8StringHasNonASCII(std::string_view utf8string);
// Removes overlong encodings and similar.
@ -109,14 +109,13 @@ std::string CodepointToUTF8(uint32_t codePoint);
std::string ConvertWStringToUTF8(const std::wstring &wstr);
std::string ConvertWStringToUTF8(const wchar_t *wstr);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source);
std::wstring ConvertUTF8ToWString(const std::string &source);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source);
std::wstring ConvertUTF8ToWString(std::string_view source);
#else
// Used by SymbolMap/assembler
std::wstring ConvertUTF8ToWString(const std::string &source);
std::wstring ConvertUTF8ToWString(std::string_view source);
std::string ConvertWStringToUTF8(const std::wstring &wstr);
#endif
@ -124,5 +123,5 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr);
std::string ConvertUCS2ToUTF8(const std::u16string &wstr);
// Dest size in units, not bytes.
void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &source);
std::u16string ConvertUTF8ToUCS2(const std::string &source);
void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, std::string_view source);
std::u16string ConvertUTF8ToUCS2(std::string_view source);

View file

@ -215,10 +215,12 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
}
// Find the first file in the directory.
WIN32_FIND_DATA ffd;
std::wstring wpath = directory.ToWString();
wpath += L"\\*";
#if PPSSPP_PLATFORM(UWP)
HANDLE hFind = FindFirstFileExFromAppW((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
HANDLE hFind = FindFirstFileExFromAppW(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
#else
HANDLE hFind = FindFirstFileEx((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
HANDLE hFind = FindFirstFileEx(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
#endif
if (hFind == INVALID_HANDLE_VALUE) {
#if PPSSPP_PLATFORM(UWP)

View file

@ -136,7 +136,7 @@ void TextDrawerWin32::MeasureString(const char *str, size_t len, float *w, float
std::string toMeasure = ReplaceAll(std::string(str, len), "&&", "&");
std::vector<std::string> lines;
std::vector<std::string_view> lines;
SplitString(toMeasure, '\n', lines);
int extW = 0, extH = 0;
@ -177,20 +177,20 @@ void TextDrawerWin32::MeasureStringRect(const char *str, size_t len, const Bound
TEXTMETRIC metrics{};
GetTextMetrics(ctx_->hDC, &metrics);
std::vector<std::string> lines;
std::vector<std::string_view> lines;
SplitString(toMeasure, '\n', lines);
int total_w = 0;
int total_h = 0;
CacheKey key{ "", fontHash_};
for (size_t i = 0; i < lines.size(); i++) {
CacheKey key{ lines[i], fontHash_ };
key.text = lines[i];
TextMeasureEntry *entry;
auto iter = sizeCache_.find(key);
if (iter != sizeCache_.end()) {
entry = iter->second.get();
} else {
SIZE size;
std::wstring wstr = ConvertUTF8ToWString(lines[i].length() == 0 ? " " : ReplaceAll(lines[i], "&&", "&"));
std::wstring wstr = ConvertUTF8ToWString(lines[i].empty() ? " " : ReplaceAll(lines[i], "&&", "&"));
GetTextExtentPoint32(ctx_->hDC, wstr.c_str(), (int)wstr.size(), &size);
entry = new TextMeasureEntry();

View file

@ -284,6 +284,7 @@ std::string_view StripQuotes(std::string_view s) {
return s;
}
// NOTE: str must live at least as long as all uses of output.
void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output) {
size_t next = 0;
for (size_t pos = 0, len = str.length(); pos < len; ++pos) {

View file

@ -81,8 +81,9 @@ std::string StripQuotes(const std::string &s);
std::string_view StripSpaces(std::string_view s);
std::string_view StripQuotes(std::string_view s);
// TODO: Make this a lot more efficient by outputting string_views.
// NOTE: str must live at least as long as all uses of output.
void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output);
// Try to avoid this when possible, in favor of the string_view version.
void SplitString(std::string_view str, const char delim, std::vector<std::string> &output);
void GetQuotedStrings(const std::string& str, std::vector<std::string>& output);