mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Add string_view functionality to the core utf-8 parsing code
This commit is contained in:
parent
26db1cb05d
commit
9c9fe8bf8f
4 changed files with 21 additions and 75 deletions
|
@ -206,27 +206,15 @@ int u8_charnum(const char *s, int offset)
|
|||
return charnum;
|
||||
}
|
||||
|
||||
/* number of characters */
|
||||
int u8_strlen(const char *s)
|
||||
{
|
||||
int count = 0;
|
||||
int i = 0;
|
||||
|
||||
while (u8_nextchar(s, &i) != 0)
|
||||
count++;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* reads the next utf-8 sequence out of a string, updating an index */
|
||||
uint32_t u8_nextchar(const char *s, int *index) {
|
||||
uint32_t u8_nextchar(const char *s, int *index, size_t size) {
|
||||
uint32_t ch = 0;
|
||||
int sz = 0;
|
||||
int i = *index;
|
||||
do {
|
||||
ch = (ch << 6) + (unsigned char)s[i++];
|
||||
sz++;
|
||||
} while (s[i] && ((s[i]) & 0xC0) == 0x80);
|
||||
} while (i < size && ((s[i]) & 0xC0) == 0x80);
|
||||
*index = i;
|
||||
return ch - offsetsFromUTF8[sz - 1];
|
||||
}
|
||||
|
@ -234,7 +222,6 @@ uint32_t u8_nextchar(const char *s, int *index) {
|
|||
uint32_t u8_nextchar_unsafe(const char *s, int *i) {
|
||||
uint32_t ch = (unsigned char)s[(*i)++];
|
||||
int sz = 1;
|
||||
|
||||
if (ch >= 0xF0) {
|
||||
sz++;
|
||||
ch &= ~0x10;
|
||||
|
@ -253,7 +240,6 @@ uint32_t u8_nextchar_unsafe(const char *s, int *i) {
|
|||
ch <<= 6;
|
||||
ch += ((unsigned char)s[(*i)++]) & 0x3F;
|
||||
}
|
||||
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
@ -367,48 +353,6 @@ int u8_unescape(char *buf, int sz, char *src)
|
|||
return c;
|
||||
}
|
||||
|
||||
const char *u8_strchr(const char *s, uint32_t ch, int *charn)
|
||||
{
|
||||
int i = 0, lasti=0;
|
||||
uint32_t c;
|
||||
|
||||
*charn = 0;
|
||||
while (s[i]) {
|
||||
c = u8_nextchar(s, &i);
|
||||
if (c == ch) {
|
||||
return &s[lasti];
|
||||
}
|
||||
lasti = i;
|
||||
(*charn)++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *u8_memchr(const char *s, uint32_t ch, size_t sz, int *charn)
|
||||
{
|
||||
size_t i = 0, lasti=0;
|
||||
uint32_t c;
|
||||
int csz;
|
||||
|
||||
*charn = 0;
|
||||
while (i < sz) {
|
||||
c = csz = 0;
|
||||
do {
|
||||
c <<= 6;
|
||||
c += (unsigned char)s[i++];
|
||||
csz++;
|
||||
} while (i < sz && !isutf(s[i]));
|
||||
c -= offsetsFromUTF8[csz-1];
|
||||
|
||||
if (c == ch) {
|
||||
return &s[lasti];
|
||||
}
|
||||
lasti = i;
|
||||
(*charn)++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int u8_is_locale_utf8(const char *locale)
|
||||
{
|
||||
/* this code based on libutf8 */
|
||||
|
@ -428,10 +372,10 @@ int u8_is_locale_utf8(const char *locale)
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool AnyEmojiInString(const char *s, size_t byteCount) {
|
||||
bool AnyEmojiInString(std::string_view str, size_t byteCount) {
|
||||
int i = 0;
|
||||
while (i < byteCount) {
|
||||
uint32_t c = u8_nextchar(s, &i);
|
||||
uint32_t c = u8_nextchar(str.data(), &i, str.size());
|
||||
if (CodepointIsProbablyEmoji(c)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -517,8 +461,8 @@ std::string ConvertUCS2ToUTF8(const std::u16string &wstr) {
|
|||
return s;
|
||||
}
|
||||
|
||||
std::string SanitizeUTF8(const std::string &utf8string) {
|
||||
UTF8 utf(utf8string.c_str());
|
||||
std::string SanitizeUTF8(std::string_view utf8string) {
|
||||
UTF8 utf(utf8string);
|
||||
std::string s;
|
||||
// Worst case.
|
||||
s.resize(utf8string.size() * 4);
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
uint32_t u8_nextchar(const char *s, int *i);
|
||||
uint32_t u8_nextchar(const char *s, int *i, size_t size);
|
||||
uint32_t u8_nextchar_unsafe(const char *s, int *i);
|
||||
int u8_wc_toutf8(char *dest, uint32_t ch);
|
||||
int u8_strlen(const char *s);
|
||||
void u8_inc(const char *s, int *i);
|
||||
void u8_dec(const char *s, int *i);
|
||||
|
||||
|
@ -33,21 +33,23 @@ inline bool CodepointIsProbablyEmoji(uint32_t c) {
|
|||
return c > 0xFFFF;
|
||||
}
|
||||
|
||||
bool AnyEmojiInString(const char *s, size_t byteCount);
|
||||
bool AnyEmojiInString(std::string_view str, size_t byteCount);
|
||||
|
||||
class UTF8 {
|
||||
public:
|
||||
static const uint32_t INVALID = (uint32_t)-1;
|
||||
UTF8(const char *c) : c_(c), index_(0) {}
|
||||
UTF8(const char *c, int index) : c_(c), index_(index) {}
|
||||
bool end() const { return c_[index_] == 0; }
|
||||
// TODO: Try to get rid of this constructor.
|
||||
explicit UTF8(const char *c) : c_(c), size_((int)strlen(c)), index_(0) {}
|
||||
explicit UTF8(std::string_view view) : c_(view.data()), size_((int)view.size()), index_(0) {}
|
||||
explicit UTF8(std::string_view view, int index) : c_(view.data()), size_((int)view.size()), index_(index) {}
|
||||
bool end() const { return index_ == size_; }
|
||||
// Returns true if the next character is outside BMP and Planes 1 - 16.
|
||||
bool invalid() const {
|
||||
unsigned char c = (unsigned char)c_[index_];
|
||||
return (c >= 0x80 && c <= 0xC1) || c >= 0xF5;
|
||||
}
|
||||
uint32_t next() {
|
||||
return u8_nextchar(c_, &index_);
|
||||
return u8_nextchar(c_, &index_, size_);
|
||||
}
|
||||
// Allow invalid continuation bytes.
|
||||
uint32_t next_unsafe() {
|
||||
|
@ -55,7 +57,7 @@ public:
|
|||
}
|
||||
uint32_t peek() const {
|
||||
int tempIndex = index_;
|
||||
return u8_nextchar(c_, &tempIndex);
|
||||
return u8_nextchar(c_, &tempIndex, size_);
|
||||
}
|
||||
void fwd() {
|
||||
u8_inc(c_, &index_);
|
||||
|
@ -64,7 +66,7 @@ public:
|
|||
u8_dec(c_, &index_);
|
||||
}
|
||||
int length() const {
|
||||
return u8_strlen(c_);
|
||||
return size_;
|
||||
}
|
||||
int byteIndex() const {
|
||||
return index_;
|
||||
|
@ -88,6 +90,7 @@ public:
|
|||
private:
|
||||
const char *c_;
|
||||
int index_;
|
||||
int size_;
|
||||
};
|
||||
|
||||
int UTF8StringNonASCIICount(const char *utf8string);
|
||||
|
@ -96,8 +99,7 @@ bool UTF8StringHasNonASCII(const char *utf8string);
|
|||
|
||||
|
||||
// Removes overlong encodings and similar.
|
||||
std::string SanitizeUTF8(const std::string &utf8string);
|
||||
|
||||
std::string SanitizeUTF8(std::string_view utf8string);
|
||||
std::string CodepointToUTF8(uint32_t codePoint);
|
||||
|
||||
|
||||
|
|
|
@ -658,7 +658,7 @@ bool MainUI::event(QEvent *e) {
|
|||
default:
|
||||
if (str.size()) {
|
||||
int pos = 0;
|
||||
int unicode = u8_nextchar(str.c_str(), &pos);
|
||||
int unicode = u8_nextchar(str.c_str(), &pos, str.size());
|
||||
NativeKey(KeyInput(DEVICE_ID_KEYBOARD, unicode));
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -867,7 +867,7 @@ static void ProcessSDLEvent(SDL_Window *window, const SDL_Event &event, InputSta
|
|||
case SDL_TEXTINPUT:
|
||||
{
|
||||
int pos = 0;
|
||||
int c = u8_nextchar(event.text.text, &pos);
|
||||
int c = u8_nextchar(event.text.text, &pos, strlen(event.text.text));
|
||||
KeyInput key;
|
||||
key.flags = KEY_CHAR;
|
||||
key.unicodeChar = c;
|
||||
|
|
Loading…
Add table
Reference in a new issue