COMMON: Add CP850 conversion.

CP850 is used by the mortevielle engine (and apparently by other engines too). Anytime an engine using CP850 encoding wants to use the TTS, the encoding has to be converted, so this is pretty important encoding conversion to support. Unfortunately SDL (when compiled without iconv) doesn't support this encoding (which means, there might not be a way to convert this encoding on some platforms), so I added a conversion table for this.
author: Jaromir Wysoglad 2019-08-26 16:48:50 +0200
committer: Filippos Karapetis 2019-09-01 22:47:55 +0300
commit: 291360a280bef756f2733515a7bd532856572831 (patch)
tree: 08b5ecce4e6a5d5dbc356d676a17646781765125
parent: 55c399c7c0ebe4084c1bdf31e647cfa50cc01c09 (diff)
download: scummvm-rg350-291360a280bef756f2733515a7bd532856572831.tar.gz
scummvm-rg350-291360a280bef756f2733515a7bd532856572831.tar.bz2
scummvm-rg350-291360a280bef756f2733515a7bd532856572831.zip
2 files changed, 100 insertions, 0 deletions
diff --git a/common/encoding.cpp b/common/encoding.cpp
index 0fe490bc78..fd4f40adc7 100644
--- a/common/encoding.cpp
+++ b/common/encoding.cpp
@@ -167,6 +167,10 @@ char *Encoding::conversion(const String &to, const String &from, const char *str
 		result = convertTransManMapping(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length);
 	}
 
+	if (result == nullptr) {
+		result = convertConversionTable(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length);
+	}
+
 	return result;
 }
 
@@ -317,6 +321,86 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c
 #endif // USE_TRANSLATION
 }
 
+static uint32 g_cp850ConversionTable[] = {
+	0x0000, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
+	0x25d8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
+	0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8,
+	0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x2302,
+
+	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
+	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
+	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
+	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
+	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
+	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
+	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
+	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
+	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
+	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
+	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
+};
+
+char *Encoding::convertConversionTable(const char *to, const char *from, const char *string, size_t length) {
+	if (String(from).equalsIgnoreCase("cp850")) {
+		uint32 *utf32Result = (uint32 *) calloc(sizeof(uint32), length + 1);
+		if (!utf32Result) {
+			warning("Could not allocate memory for encoding conversion");
+			return nullptr;
+		}
+		for (unsigned i = 0; i < length; i++) {
+			utf32Result[i] = g_cp850ConversionTable[(unsigned char) string[i]];
+		}
+		char *finalResult = convert(to, "utf-32", (char *)utf32Result, length * 4);
+		free(utf32Result);
+		return finalResult;
+	}
+	if (String(to).equalsIgnoreCase("cp850")) {
+		uint32 *utf32Result = (uint32 *) convert("utf-32", from, string, length);
+		if (String(from).hasPrefixIgnoreCase("utf-16"))
+			length /= 2;
+		if (String(from).hasPrefixIgnoreCase("utf-32"))
+			length /= 4;
+		char *finalResult = (char *) calloc(sizeof(char), length +1);
+		if (!finalResult) {
+			warning("Could not allocate memory for encoding conversion");
+			return nullptr;
+		}
+		for (unsigned i = 0; i < length; i++) {
+			for (unsigned j = 0; j < 257; j++) {
+				if (j == 256) {
+					// We have some character, that isn't a part of cp850, so
+					// we replace it with '?' to remain consistent with iconv
+					// and SDL
+					finalResult[i] = '?';
+				} else if (utf32Result[i] == g_cp850ConversionTable[j]){
+					finalResult[i] = j;
+					break;
+				}
+			}
+		}
+		free(utf32Result);
+		return finalResult;
+	}
+	return nullptr;
+}
+
 static char g_cyrillicTransliterationTable[] = {
 	' ', 'E', 'D', 'G', 'E', 'Z', 'I', 'I', 'J', 'L', 'N', 'C', 'K', '-', 'U', 'D',
 	'A', 'B', 'V', 'G', 'D', 'E', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
diff --git a/common/encoding.h b/common/encoding.h
index 8245157a95..8a77c81b08 100644
--- a/common/encoding.h
+++ b/common/encoding.h
@@ -173,6 +173,22 @@ class Encoding {
 		static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);
 
 		/**
+		 * Uses conversion table to convert the string to unicode and from that
+		 * to the final encoding. Important encodings, that aren't supported by
+		 * all backends should go here.
+		 *
+		 * The result has to be freed after use.
+		 *
+		 * @param to Name of the encoding the strings will be converted to
+		 * @param from Name of the encoding the strings will be converted from
+		 * @param string String that should be converted.
+		 * @param length Length of the string to convert in bytes.
+		 *
+		 * @return Converted string (must be freed) or nullptr if the conversion failed
+		 */
+		static char *convertConversionTable(const char *to, const char *from, const char *string, size_t length);
+
+		/**
 		 * Transliterates cyrillic string in iso-8859-5 encoding and returns
 		 * it's ASCII (latin) form.
 		 *
author	Jaromir Wysoglad	2019-08-26 16:48:50 +0200
committer	Filippos Karapetis	2019-09-01 22:47:55 +0300
commit	291360a280bef756f2733515a7bd532856572831 (patch)
tree	08b5ecce4e6a5d5dbc356d676a17646781765125
parent	55c399c7c0ebe4084c1bdf31e647cfa50cc01c09 (diff)
download	scummvm-rg350-291360a280bef756f2733515a7bd532856572831.tar.gz scummvm-rg350-291360a280bef756f2733515a7bd532856572831.tar.bz2 scummvm-rg350-291360a280bef756f2733515a7bd532856572831.zip