diff options
author | yinsimei | 2017-07-06 22:05:13 +0200 |
---|---|---|
committer | Eugene Sandulenko | 2017-07-13 18:27:45 +0200 |
commit | 302c94627930e9965f7607497a01201fff3551e3 (patch) | |
tree | f2301bea76f41412199c16bdb5e1bcee4fdbace9 /engines/sludge/utf8.cpp | |
parent | fe773c1bebf353bdba2a4a279b19a85ea4d7be5a (diff) | |
download | scummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.tar.gz scummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.tar.bz2 scummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.zip |
SLUDGE: use U32String to replace sludge utf8 library
Diffstat (limited to 'engines/sludge/utf8.cpp')
-rw-r--r-- | engines/sludge/utf8.cpp | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/engines/sludge/utf8.cpp b/engines/sludge/utf8.cpp new file mode 100644 index 0000000000..95f9e91f8a --- /dev/null +++ b/engines/sludge/utf8.cpp @@ -0,0 +1,101 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ +/* + Basic UTF-8 manipulation routines + by Jeff Bezanson + placed in the public domain Fall 2005 + + This code is designed to provide the utilities you need to manipulate + UTF-8 as an internal string encoding. These functions do not perform the + error checking normally needed when handling UTF-8 data, so if you happen + to be from the Unicode Consortium you will want to flay me alive. + I do this because error checking can be performed at the boundaries (I/O), + with these routines reserved for higher performance on data known to be + valid. + */ + +#include "common/debug.h" + +#include "sludge/utf8.h" + +namespace Sludge { + +const uint32 UTF8Converter::offsetsFromUTF8[6] = { + 0x00000000UL, 0x00003080UL, + 0x000E2080UL, 0x03C82080UL, + 0xFA082080UL, 0x82082080UL }; + +/* reads the next utf-8 sequence out of a string, updating an index */ +uint32 UTF8Converter::nextchar(const char *s, int *i) { + uint32 ch = 0; + int sz = 0; + + do { + ch <<= 6; + ch += (unsigned char)s[(*i)++]; + sz++; + } while (s[*i] && !isutf(s[*i])); + ch -= offsetsFromUTF8[sz - 1]; + + return ch; +} + +Common::U32String UTF8Converter::convertUtf8ToUtf32(const Common::String &str) { + // we assume one character in a Common::String is one byte + // but in this case it's actually an UTF-8 string + // with up to 4 bytes per character. To work around this, + // convert it to an U32String before any further operation + Common::U32String u32str; + int i = 0; + while (i < (int)str.size()) { + uint32 chr = nextchar(str.c_str(), &i); + u32str += chr; + } + return u32str; +} + +/* utf32 index => original byte offset */ +int UTF8Converter::getOriginOffset(int origIdx) { + int offs = 0; + + while (origIdx > 0 && _str[offs]) { + // increment if it's not the start of a utf8 sequence + (void)(isutf(_str[++offs]) || isutf(_str[++offs]) || isutf(_str[++offs]) || ++offs); + origIdx--; + } + return offs; +} + +/** Construct a UTF8String with original char array to convert */ +UTF8Converter::UTF8Converter(const char *str) { + setUTF8String(str); +} + +/** set a utf8 string to convert */ +void UTF8Converter::setUTF8String(Common::String str) { + _str32.clear(); + _str32 = convertUtf8ToUtf32(str); + _str.clear(); + _str = str; +} + +} // End of namespace Sludge |