Add string_view support to more UTF functions

2025-04-02 11:01:50 -04:00 · 2024-01-12 12:34:47 +01:00 · 2024-01-12 12:34:47 +01:00 · 0fc3e2b777
commit 0fc3e2b777
parent 9c9fe8bf8f
6 changed files with 43 additions and 43 deletions
--- a/Common/Data/Encoding/Utf8.cpp
+++ b/Common/Data/Encoding/Utf8.cpp
@ -383,7 +383,7 @@ bool AnyEmojiInString(std::string_view str, size_t byteCount) {
 	return false;
 }

-int UTF8StringNonASCIICount(const char *utf8string) {
+int UTF8StringNonASCIICount(std::string_view utf8string) {
 	UTF8 utf(utf8string);
 	int count = 0;
 	while (!utf.end()) {
@ -394,7 +394,7 @@ int UTF8StringNonASCIICount(const char *utf8string) {
 	return count;
 }

-bool UTF8StringHasNonASCII(const char *utf8string) {
+bool UTF8StringHasNonASCII(std::string_view utf8string) {
 	return UTF8StringNonASCIICount(utf8string) > 0;
 }

@ -422,25 +422,21 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
 	return s;
 }

-void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source) {
+void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source) {
 	int len = (int)source.size();
-	int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
-	MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, dest, std::min((int)destSize, size));
+	destSize -= 1;  // account for the \0.
+	int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
+	MultiByteToWideChar(CP_UTF8, 0, source.data(), len, dest, std::min((int)destSize, size));
+	dest[size] = 0;
 }

-void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source) {
-	int len = (int)strlen(source) + 1;  // include trailing zero
-	int size = (int)MultiByteToWideChar(CP_UTF8, 0, source, len, NULL, 0);
-	MultiByteToWideChar(CP_UTF8, 0, source, len, dest, std::min((int)destSize, size));
-}
-
-std::wstring ConvertUTF8ToWString(const std::string &source) {
+std::wstring ConvertUTF8ToWString(const std::string_view source) {
 	int len = (int)source.size();
-	int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
+	int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
 	std::wstring str;
 	str.resize(size);
 	if (size > 0) {
-		MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, &str[0], size);
+		MultiByteToWideChar(CP_UTF8, 0, source.data(), source.size(), &str[0], size);
 	}
 	return str;
 }
@ -477,11 +473,11 @@ std::string SanitizeUTF8(std::string_view utf8string) {
 	return s;
 }

-static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const std::string &source) {
+static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, std::string_view source) {
 	const char16_t *const orig = dest;
 	const char16_t *const destEnd = dest + destSize;

-	UTF8 utf(source.c_str());
+	UTF8 utf(source);

 	char16_t *destw = (char16_t *)dest;
 	const char16_t *const destwEnd = destw + destSize;
@ -494,7 +490,7 @@ static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const s
 		destw += UTF16LE::encodeUCS2(destw, c);
 	}

-	// No ++ to not count the terminal in length.
+	// No ++ to not count the null-terminator in length.
 	if (destw < destEnd) {
 		*destw = 0;
 	}
@ -506,11 +502,11 @@ void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &sourc
 	ConvertUTF8ToUCS2Internal(dest, destSize, source);
 }

-std::u16string ConvertUTF8ToUCS2(const std::string &source) {
+std::u16string ConvertUTF8ToUCS2(std::string_view source) {
 	std::u16string dst;
-	// utf-8 won't be less bytes than there are characters.  But need +1 for terminator.
-	dst.resize(source.size() + 1, 0);
-	size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size() + 1, source);
+	// utf-8 won't be less bytes than there are characters.
+	dst.resize(source.size(), 0);
+	size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size(), source);
 	dst.resize(realLen);
 	return dst;
 }
@ -539,11 +535,11 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
 	return s;
 }

-static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const std::string &source) {
+static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, std::string_view source) {
 	const wchar_t *const orig = dest;
 	const wchar_t *const destEnd = dest + destSize;

-	UTF8 utf(source.c_str());
+	UTF8 utf(source);

 	if (sizeof(wchar_t) == 2) {
 		char16_t *destw = (char16_t *)dest;
@ -572,12 +568,13 @@ static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const
 	return dest - orig;
 }

-std::wstring ConvertUTF8ToWString(const std::string &source) {
+std::wstring ConvertUTF8ToWString(std::string_view source) {
 	std::wstring dst;
-	// utf-8 won't be less bytes than there are characters.  But need +1 for terminator.
-	dst.resize(source.size() + 1, 0);
-	size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size() + 1, source);
+	// utf-8 won't be less bytes than there are characters.
+	dst.resize(source.size(), 0);
+	size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size(), source);
 	dst.resize(realLen);
+	dst[realLen] = 0;
 	return dst;
 }

--- a/Common/Data/Encoding/Utf8.h
+++ b/Common/Data/Encoding/Utf8.h
@ -93,9 +93,9 @@ private:
 	int size_;
 };

-int UTF8StringNonASCIICount(const char *utf8string);
+int UTF8StringNonASCIICount(std::string_view utf8string);

-bool UTF8StringHasNonASCII(const char *utf8string);
+bool UTF8StringHasNonASCII(std::string_view utf8string);


 // Removes overlong encodings and similar.
@ -109,14 +109,13 @@ std::string CodepointToUTF8(uint32_t codePoint);

 std::string ConvertWStringToUTF8(const std::wstring &wstr);
 std::string ConvertWStringToUTF8(const wchar_t *wstr);
-void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source);
-void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source);
-std::wstring ConvertUTF8ToWString(const std::string &source);
+void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source);
+std::wstring ConvertUTF8ToWString(std::string_view source);

 #else

 // Used by SymbolMap/assembler
-std::wstring ConvertUTF8ToWString(const std::string &source);
+std::wstring ConvertUTF8ToWString(std::string_view source);
 std::string ConvertWStringToUTF8(const std::wstring &wstr);

 #endif
@ -124,5 +123,5 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr);
 std::string ConvertUCS2ToUTF8(const std::u16string &wstr);

 // Dest size in units, not bytes.
-void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &source);
-std::u16string ConvertUTF8ToUCS2(const std::string &source);
+void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, std::string_view source);
+std::u16string ConvertUTF8ToUCS2(std::string_view source);
--- a/Common/File/DirListing.cpp
+++ b/Common/File/DirListing.cpp
@ -215,10 +215,12 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
 	}
 	// Find the first file in the directory.
 	WIN32_FIND_DATA ffd;
+	std::wstring wpath = directory.ToWString();
+	wpath += L"\\*";
 #if PPSSPP_PLATFORM(UWP)
-	HANDLE hFind = FindFirstFileExFromAppW((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
+	HANDLE hFind = FindFirstFileExFromAppW(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
 #else
-	HANDLE hFind = FindFirstFileEx((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
+	HANDLE hFind = FindFirstFileEx(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
 #endif
 	if (hFind == INVALID_HANDLE_VALUE) {
 #if PPSSPP_PLATFORM(UWP)
--- a/Common/Render/Text/draw_text_win.cpp
+++ b/Common/Render/Text/draw_text_win.cpp
@ -136,7 +136,7 @@ void TextDrawerWin32::MeasureString(const char *str, size_t len, float *w, float

 		std::string toMeasure = ReplaceAll(std::string(str, len), "&&", "&");

-		std::vector<std::string> lines;
+		std::vector<std::string_view> lines;
 		SplitString(toMeasure, '\n', lines);

 		int extW = 0, extH = 0;
@ -177,20 +177,20 @@ void TextDrawerWin32::MeasureStringRect(const char *str, size_t len, const Bound
 	TEXTMETRIC metrics{};
 	GetTextMetrics(ctx_->hDC, &metrics);

-	std::vector<std::string> lines;
+	std::vector<std::string_view> lines;
 	SplitString(toMeasure, '\n', lines);
 	int total_w = 0;
 	int total_h = 0;
+	CacheKey key{ "", fontHash_};
 	for (size_t i = 0; i < lines.size(); i++) {
-		CacheKey key{ lines[i], fontHash_ };
-
+		key.text = lines[i];
 		TextMeasureEntry *entry;
 		auto iter = sizeCache_.find(key);
 		if (iter != sizeCache_.end()) {
 			entry = iter->second.get();
 		} else {
 			SIZE size;
-			std::wstring wstr = ConvertUTF8ToWString(lines[i].length() == 0 ? " " : ReplaceAll(lines[i], "&&", "&"));
+			std::wstring wstr = ConvertUTF8ToWString(lines[i].empty() ? " " : ReplaceAll(lines[i], "&&", "&"));
 			GetTextExtentPoint32(ctx_->hDC, wstr.c_str(), (int)wstr.size(), &size);

 			entry = new TextMeasureEntry();
--- a/Common/StringUtils.cpp
+++ b/Common/StringUtils.cpp
@ -284,6 +284,7 @@ std::string_view StripQuotes(std::string_view s) {
 		return s;
 }

+// NOTE: str must live at least as long as all uses of output.
 void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output) {
 	size_t next = 0;
 	for (size_t pos = 0, len = str.length(); pos < len; ++pos) {
--- a/Common/StringUtils.h
+++ b/Common/StringUtils.h
@ -81,8 +81,9 @@ std::string StripQuotes(const std::string &s);
 std::string_view StripSpaces(std::string_view s);
 std::string_view StripQuotes(std::string_view s);

-// TODO: Make this a lot more efficient by outputting string_views.
+// NOTE: str must live at least as long as all uses of output.
 void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output);
+// Try to avoid this when possible, in favor of the string_view version.
 void SplitString(std::string_view str, const char delim, std::vector<std::string> &output);

 void GetQuotedStrings(const std::string& str, std::vector<std::string>& output);