diff options
| -rw-r--r-- | common/encoding.cpp | 84 | ||||
| -rw-r--r-- | common/encoding.h | 16 | 
2 files changed, 100 insertions, 0 deletions
diff --git a/common/encoding.cpp b/common/encoding.cpp index 0fe490bc78..fd4f40adc7 100644 --- a/common/encoding.cpp +++ b/common/encoding.cpp @@ -167,6 +167,10 @@ char *Encoding::conversion(const String &to, const String &from, const char *str  		result = convertTransManMapping(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length);  	} +	if (result == nullptr) { +		result = convertConversionTable(addUtfEndianness(to).c_str(), addUtfEndianness(from).c_str(), string, length); +	} +  	return result;  } @@ -317,6 +321,86 @@ char *Encoding::convertTransManMapping(const char *to, const char *from, const c  #endif // USE_TRANSLATION  } +static uint32 g_cp850ConversionTable[] = { +	0x0000, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, +	0x25d8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C, +	0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, +	0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC, +	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, +	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, +	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, +	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, +	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, +	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, +	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, +	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, +	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, +	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, +	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, +	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x2302, + +	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, +	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, +	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, +	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, +	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, +	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, +	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, +	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, +	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, +	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, +	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, +	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, +	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, +	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, +	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, +	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0 +}; + +char *Encoding::convertConversionTable(const char *to, const char *from, const char *string, size_t length) { +	if (String(from).equalsIgnoreCase("cp850")) { +		uint32 *utf32Result = (uint32 *) calloc(sizeof(uint32), length + 1); +		if (!utf32Result) { +			warning("Could not allocate memory for encoding conversion"); +			return nullptr; +		} +		for (unsigned i = 0; i < length; i++) { +			utf32Result[i] = g_cp850ConversionTable[(unsigned char) string[i]]; +		} +		char *finalResult = convert(to, "utf-32", (char *)utf32Result, length * 4); +		free(utf32Result); +		return finalResult; +	} +	if (String(to).equalsIgnoreCase("cp850")) { +		uint32 *utf32Result = (uint32 *) convert("utf-32", from, string, length); +		if (String(from).hasPrefixIgnoreCase("utf-16")) +			length /= 2; +		if (String(from).hasPrefixIgnoreCase("utf-32")) +			length /= 4; +		char *finalResult = (char *) calloc(sizeof(char), length +1); +		if (!finalResult) { +			warning("Could not allocate memory for encoding conversion"); +			return nullptr; +		} +		for (unsigned i = 0; i < length; i++) { +			for (unsigned j = 0; j < 257; j++) { +				if (j == 256) { +					// We have some character, that isn't a part of cp850, so +					// we replace it with '?' to remain consistent with iconv +					// and SDL +					finalResult[i] = '?'; +				} else if (utf32Result[i] == g_cp850ConversionTable[j]){ +					finalResult[i] = j; +					break; +				} +			} +		} +		free(utf32Result); +		return finalResult; +	} +	return nullptr; +} +  static char g_cyrillicTransliterationTable[] = {  	' ', 'E', 'D', 'G', 'E', 'Z', 'I', 'I', 'J', 'L', 'N', 'C', 'K', '-', 'U', 'D',  	'A', 'B', 'V', 'G', 'D', 'E', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', diff --git a/common/encoding.h b/common/encoding.h index 8245157a95..8a77c81b08 100644 --- a/common/encoding.h +++ b/common/encoding.h @@ -173,6 +173,22 @@ class Encoding {  		static char *convertTransManMapping(const char *to, const char *from, const char *string, size_t length);  		/** +		 * Uses conversion table to convert the string to unicode and from that +		 * to the final encoding. Important encodings, that aren't supported by +		 * all backends should go here. +		 * +		 * The result has to be freed after use. +		 * +		 * @param to Name of the encoding the strings will be converted to +		 * @param from Name of the encoding the strings will be converted from +		 * @param string String that should be converted. +		 * @param length Length of the string to convert in bytes. +		 * +		 * @return Converted string (must be freed) or nullptr if the conversion failed +		 */ +		static char *convertConversionTable(const char *to, const char *from, const char *string, size_t length); + +		/**  		 * Transliterates cyrillic string in iso-8859-5 encoding and returns  		 * it's ASCII (latin) form.  		 *  | 
