aboutsummaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorLe Philousophe2019-11-16 20:58:45 +0100
committerFilippos Karapetis2019-11-30 13:34:33 +0200
commit6aaec02c57cd04906242d2014ba9242422bb2258 (patch)
tree1647e56e8688e5626a0426e268a3e44894b788e3 /common
parent22a2a392d3234139acd02fab99da6f2959ae1893 (diff)
downloadscummvm-rg350-6aaec02c57cd04906242d2014ba9242422bb2258.tar.gz
scummvm-rg350-6aaec02c57cd04906242d2014ba9242422bb2258.tar.bz2
scummvm-rg350-6aaec02c57cd04906242d2014ba9242422bb2258.zip
COMMON: Add CJK encodings to Common::CodePage
Use Encoding::convert to do the conversion first and try old method if it fails
Diffstat (limited to 'common')
-rw-r--r--common/str-enc.cpp53
-rw-r--r--common/str-enc.h8
2 files changed, 53 insertions, 8 deletions
diff --git a/common/str-enc.cpp b/common/str-enc.cpp
index 2a52070810..3aba06d58f 100644
--- a/common/str-enc.cpp
+++ b/common/str-enc.cpp
@@ -20,8 +20,10 @@
*
*/
+#include "common/encoding.h"
#include "common/str.h"
#include "common/ustr.h"
+#include "common/util.h"
namespace Common {
@@ -265,6 +267,21 @@ static const uint32 g_windows1257ConversionTable[] = {0x20AC, 0x0081, 0x201A, 0x
0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9};
+/* This array must match the enum defined in str-enc.h */
+static char const *const g_codePageMap[] = {
+ "UTF-8", /* kUtf8 */
+ "WINDOWS-1250", /* kWindows1250 */
+ "WINDOWS-1251", /* kWindows1251 */
+ "WINDOWS-1252", /* kWindows1252 */
+ "WINDOWS-1253", /* kWindows1253 */
+ "WINDOWS-1254", /* kWindows1254 */
+ "WINDOWS-1255", /* kWindows1255 */
+ "WINDOWS-1257", /* kWindows1257 */
+ "MS932", /* kWindows932 */
+ "MSCP949", /* kWindows949 */
+ "CP950" /* kWindows950 */
+};
+
void String::decodeOneByte(U32String &dst, CodePage page) const {
for (uint i = 0; i < _size; ++i) {
if ((byte)_str[i] <= 0x7F) {
@@ -303,11 +320,22 @@ void String::decodeOneByte(U32String &dst, CodePage page) const {
}
U32String String::decode(CodePage page) const {
- U32String unicodeString;
- if (page == kUtf8) {
+ if (page == kCodePageInvalid ||
+ page >= ARRAYSIZE(g_codePageMap)) {
+ error("Invalid codepage");
+ }
+ char *result = Encoding::convert("UTF-32", g_codePageMap[page], _str, _size);
+ if (result) {
+ U32String unicodeString((uint32 *)result);
+ free(result);
+ return unicodeString;
+ }
+
+ U32String unicodeString;
+ if (page == kUtf8) {
decodeUTF8(unicodeString);
} else {
- decodeOneByte(unicodeString, page);
+ decodeOneByte(unicodeString, page);
}
return unicodeString;
}
@@ -363,11 +391,24 @@ void U32String::encodeOneByte(String &dst, CodePage page) const {
String U32String::encode(CodePage page) const {
- String string;
- if (page == kUtf8) {
+ if (page == kCodePageInvalid ||
+ page >= ARRAYSIZE(g_codePageMap)) {
+ error("Invalid codepage");
+ }
+ char *result = Encoding::convert(g_codePageMap[page], "UTF-32", (const char *)_str, _size);
+ if (result) {
+ // Encodings in CodePage all use '\0' as string ending
+ // That would be problematic if CodePage has UTF-16 or UTF-32
+ String string(result);
+ free(result);
+ return string;
+ }
+
+ String string;
+ if (page == kUtf8) {
encodeUTF8(string);
} else {
- encodeOneByte(string, page);
+ encodeOneByte(string, page);
}
return string;
}
diff --git a/common/str-enc.h b/common/str-enc.h
index 12ffd389f8..796cf963f0 100644
--- a/common/str-enc.h
+++ b/common/str-enc.h
@@ -29,14 +29,18 @@ class String;
class U32String;
enum CodePage {
- kUtf8,
+ kCodePageInvalid = -1,
+ kUtf8 = 0,
kWindows1250,
kWindows1251,
kWindows1252,
kWindows1253,
kWindows1254,
kWindows1255,
- kWindows1257
+ kWindows1257,
+ kWindows932,
+ kWindows949,
+ kWindows950
};
U32String convertUtf8ToUtf32(const String &str);