aboutsummaryrefslogtreecommitdiff
path: root/engines/sludge/utf8.cpp
diff options
context:
space:
mode:
authoryinsimei2017-07-06 22:05:13 +0200
committerEugene Sandulenko2017-07-13 18:27:45 +0200
commit302c94627930e9965f7607497a01201fff3551e3 (patch)
treef2301bea76f41412199c16bdb5e1bcee4fdbace9 /engines/sludge/utf8.cpp
parentfe773c1bebf353bdba2a4a279b19a85ea4d7be5a (diff)
downloadscummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.tar.gz
scummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.tar.bz2
scummvm-rg350-302c94627930e9965f7607497a01201fff3551e3.zip
SLUDGE: use U32String to replace sludge utf8 library
Diffstat (limited to 'engines/sludge/utf8.cpp')
-rw-r--r--engines/sludge/utf8.cpp101
1 files changed, 101 insertions, 0 deletions
diff --git a/engines/sludge/utf8.cpp b/engines/sludge/utf8.cpp
new file mode 100644
index 0000000000..95f9e91f8a
--- /dev/null
+++ b/engines/sludge/utf8.cpp
@@ -0,0 +1,101 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+/*
+ Basic UTF-8 manipulation routines
+ by Jeff Bezanson
+ placed in the public domain Fall 2005
+
+ This code is designed to provide the utilities you need to manipulate
+ UTF-8 as an internal string encoding. These functions do not perform the
+ error checking normally needed when handling UTF-8 data, so if you happen
+ to be from the Unicode Consortium you will want to flay me alive.
+ I do this because error checking can be performed at the boundaries (I/O),
+ with these routines reserved for higher performance on data known to be
+ valid.
+ */
+
+#include "common/debug.h"
+
+#include "sludge/utf8.h"
+
+namespace Sludge {
+
+const uint32 UTF8Converter::offsetsFromUTF8[6] = {
+ 0x00000000UL, 0x00003080UL,
+ 0x000E2080UL, 0x03C82080UL,
+ 0xFA082080UL, 0x82082080UL };
+
+/* reads the next utf-8 sequence out of a string, updating an index */
+uint32 UTF8Converter::nextchar(const char *s, int *i) {
+ uint32 ch = 0;
+ int sz = 0;
+
+ do {
+ ch <<= 6;
+ ch += (unsigned char)s[(*i)++];
+ sz++;
+ } while (s[*i] && !isutf(s[*i]));
+ ch -= offsetsFromUTF8[sz - 1];
+
+ return ch;
+}
+
+Common::U32String UTF8Converter::convertUtf8ToUtf32(const Common::String &str) {
+ // we assume one character in a Common::String is one byte
+ // but in this case it's actually an UTF-8 string
+ // with up to 4 bytes per character. To work around this,
+ // convert it to an U32String before any further operation
+ Common::U32String u32str;
+ int i = 0;
+ while (i < (int)str.size()) {
+ uint32 chr = nextchar(str.c_str(), &i);
+ u32str += chr;
+ }
+ return u32str;
+}
+
+/* utf32 index => original byte offset */
+int UTF8Converter::getOriginOffset(int origIdx) {
+ int offs = 0;
+
+ while (origIdx > 0 && _str[offs]) {
+ // increment if it's not the start of a utf8 sequence
+ (void)(isutf(_str[++offs]) || isutf(_str[++offs]) || isutf(_str[++offs]) || ++offs);
+ origIdx--;
+ }
+ return offs;
+}
+
+/** Construct a UTF8String with original char array to convert */
+UTF8Converter::UTF8Converter(const char *str) {
+ setUTF8String(str);
+}
+
+/** set a utf8 string to convert */
+void UTF8Converter::setUTF8String(Common::String str) {
+ _str32.clear();
+ _str32 = convertUtf8ToUtf32(str);
+ _str.clear();
+ _str = str;
+}
+
+} // End of namespace Sludge