From 747ace78fc1767a549560c46d7689f1f8f1628d9 Mon Sep 17 00:00:00 2001
From: lolbot-iichan
Date: Sun, 30 Jun 2019 16:45:46 +0300
Subject: COMMON & WINTERMUTE: Use non-1252 for 125X games (PR 1698)

* WINTERMUTE: Add detection for "The Driller Incident"

"The Driller Incident" is a small freeware game for Wintermute,
avaliable in English and Russian: http://questzone.ru/enzi/files/1645

* WINTERMUTE: Add detection table for "One Helluva Day" demo

"One Helluva Day" is a point-and-click adventure demo in English / Czech
/ Russian.
Free download:
https://store.steampowered.com/app/603680/One_helluva_day/

* WINTERMUTE: Support CHARSET property for TT fonts

"// we don't need this anymore" was a mistake =)

Surely, most Wintermute games are either designed for 1252 language
(DE_DEU, EN_ANY, ES_ESP, FR_FRA, IT_ITA, PT_BRA), or don't use TrueType
fonts (5ma, deadcity, grotter1, grotter2, thekite, tib), or use
CHARSET=1 with UTF strings (dirtysplit, reversion1, reversion2, twc),
which meen this conversion is not needed for those games.

However, there are some games that explicitly states CHARSET=10 (driller
(RU_RUS), oknytt (RU_RUS), onehelluvaday (UNK_LANG when playing as
Russian)) and there are some games with CHARSET=1 with non-1252 in mind
(bookofgron (RU_RUS excepts 1251), carolreed4 (RU_RUS excepts 1251),
kulivocko (CZ_CZE excepts 1250)).

This fixes text in some games: bookofgron, carolreed4, driller, kulivocko,
oknytt, onehelluvaday.

* WINTERMUTE: Break savegame compatibility

sizeof(BaseFontTT) was changed, so let's break savegame compatibility

* COMMON: Add conversion tables for win1253 and win1257

* COMMON: Add string conversion from U32String back to Common::String

convertUtf32ToUtf8 code is based on Wintermute::ConvertUTF32toUTF8
convertFromU32String use convertUtf32ToUtf8 for UTF8 or lookup through
conversion table for single-byte encodings

* WINTERMUTE: Use Common::convert functions for non-UTF charsets

* WINTERMUTE: Fix whitespaces at detection tables

* WINTERMUTE: Add TODO comments
---
 engines/wintermute/utils/string_util.cpp | 137 +++++++++++++++++++++----------
 engines/wintermute/utils/string_util.h   |   6 +-
 2 files changed, 97 insertions(+), 46 deletions(-)

(limited to 'engines/wintermute/utils')

diff --git a/engines/wintermute/utils/string_util.cpp b/engines/wintermute/utils/string_util.cpp
index 82d4fe6902..d842b468db 100644
--- a/engines/wintermute/utils/string_util.cpp
+++ b/engines/wintermute/utils/string_util.cpp
@@ -26,7 +26,9 @@
  * Copyright (c) 2011 Jan Nedoma
  */
 
+#include "common/language.h"
 #include "common/tokenizer.h"
+#include "engines/wintermute/base/base_engine.h"
 #include "engines/wintermute/utils/string_util.h"
 #include "engines/wintermute/utils/convert_utf.h"
 
@@ -96,48 +98,103 @@ Utf8String StringUtil::wideToUtf8(const WideString &WideStr) {
 }
 
 //////////////////////////////////////////////////////////////////////////
-WideString StringUtil::ansiToWide(const AnsiString &str) {
-	WideString result;
-	for (AnsiString::const_iterator i = str.begin(), end = str.end(); i != end; ++i) {
-		const byte c = *i;
-		if (c < 0x80 || c >= 0xA0) {
-			result += c;
-		} else {
-			uint32 utf32 = _ansiToUTF32[c - 0x80];
-			if (utf32) {
-				result += utf32;
-			} else {
-				// It's an invalid CP1252 character...
-			}
+Common::CodePage StringUtil::mapCodePage(TTextCharset charset) {
+	switch (charset) {
+	case CHARSET_EASTEUROPE:
+		return Common::kWindows1250;
+
+	case CHARSET_RUSSIAN:
+		return Common::kWindows1251;
+
+	case CHARSET_ANSI:
+		return Common::kWindows1252;
+
+	case CHARSET_GREEK:
+		return Common::kWindows1253;
+
+	case CHARSET_HEBREW:
+		return Common::kWindows1255;
+
+	case CHARSET_BALTIC:
+		return Common::kWindows1257;
+
+	case CHARSET_DEFAULT:
+		switch (BaseEngine::instance().getLanguage()) {
+
+		//cp1250: Central Europe
+		case Common::CZ_CZE:
+		case Common::HR_HRV:
+		case Common::HU_HUN:
+		case Common::PL_POL:
+		case Common::SK_SVK:
+			return Common::kWindows1250;
+
+		//cp1251: Cyrillic
+		case Common::RU_RUS:
+		case Common::UA_UKR:
+			return Common::kWindows1251;
+
+		//cp1252: Western Europe
+		case Common::DA_DAN:
+		case Common::DE_DEU:
+		case Common::EN_ANY:
+		case Common::EN_GRB:
+		case Common::EN_USA:
+		case Common::ES_ESP:
+		case Common::FI_FIN:
+		case Common::FR_FRA:
+		case Common::IT_ITA:
+		case Common::NB_NOR:
+		case Common::NL_NLD:
+		case Common::PT_BRA:
+		case Common::PT_POR:
+		case Common::SE_SWE:
+		case Common::UNK_LANG:
+			return Common::kWindows1252;
+
+		//cp1253: Greek
+		case Common::GR_GRE:
+			return Common::kWindows1253;
+
+		//cp1255: Hebrew
+		case Common::HE_ISR:
+			return Common::kWindows1255;
+
+		//cp1257: Baltic
+		case Common::ET_EST:
+		case Common::LV_LAT:
+			return Common::kWindows1257;
+
+		default:
+			return Common::kWindows1252;
 		}
+
+	case CHARSET_OEM:
+	case CHARSET_CHINESEBIG5:
+	case CHARSET_GB2312:
+	case CHARSET_HANGUL:
+	case CHARSET_MAC:
+	case CHARSET_SHIFTJIS:
+	case CHARSET_SYMBOL:
+	case CHARSET_TURKISH:
+	case CHARSET_VIETNAMESE:
+	case CHARSET_JOHAB:
+	case CHARSET_ARABIC:
+	case CHARSET_THAI:
+	default:
+		warning("Unsupported charset: %d", charset);
+		return Common::kWindows1252;
 	}
-	return result;
 }
 
 //////////////////////////////////////////////////////////////////////////
-AnsiString StringUtil::wideToAnsi(const WideString &wstr) {
-	AnsiString result;
-	for (WideString::const_iterator i = wstr.begin(), end = wstr.end(); i != end; ++i) {
-		const uint32 c = *i;
-		if (c < 0x80 || (c >= 0xA0 && c <= 0xFF)) {
-			result += c;
-		} else {
-			uint32 ansi = 0xFFFFFFFF;
-			for (uint j = 0; j < ARRAYSIZE(_ansiToUTF32); ++j) {
-				if (_ansiToUTF32[j] == c) {
-					ansi = j + 0x80;
-					break;
-				}
-			}
-
-			if (ansi != 0xFFFFFFFF) {
-				result += ansi;
-			} else {
-				// There's no valid CP1252 code for this character...
-			}
-		}
-	}
-	return result;
+WideString StringUtil::ansiToWide(const AnsiString &str, TTextCharset charset) {
+	return Common::convertToU32String(str.c_str(), mapCodePage(charset));
+}
+
+//////////////////////////////////////////////////////////////////////////
+AnsiString StringUtil::wideToAnsi(const WideString &wstr, TTextCharset charset) {
+	return Common::convertFromU32String(wstr, mapCodePage(charset));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -172,10 +229,4 @@ AnsiString StringUtil::toString(int val) {
 	return Common::String::format("%d", val);
 }
 
-// Mapping of CP1252 characters 0x80...0x9F into UTF-32
-uint32 StringUtil::_ansiToUTF32[32] = {
-	0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
-	0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
-};
-
 } // End of namespace Wintermute
diff --git a/engines/wintermute/utils/string_util.h b/engines/wintermute/utils/string_util.h
index 431d401d96..4657c66766 100644
--- a/engines/wintermute/utils/string_util.h
+++ b/engines/wintermute/utils/string_util.h
@@ -39,8 +39,8 @@ public:
 	//static bool compareNoCase(const WideString &str1, const WideString &str2);
 	static WideString utf8ToWide(const Utf8String &Utf8Str);
 	static Utf8String wideToUtf8(const WideString &WideStr);
-	static WideString ansiToWide(const AnsiString &str);
-	static AnsiString wideToAnsi(const WideString &str);
+	static WideString ansiToWide(const AnsiString &str, TTextCharset charset = CHARSET_ANSI);
+	static AnsiString wideToAnsi(const WideString &str, TTextCharset charset = CHARSET_ANSI);
 
 	static bool isUtf8BOM(const byte *buffer, uint32 bufferSize);
 	static int indexOf(const WideString &str, const WideString &toFind, size_t startFrom);
@@ -51,7 +51,7 @@ public:
 	static AnsiString toString(int val);
 
 private:
-	static uint32 _ansiToUTF32[32];
+	static Common::CodePage mapCodePage(TTextCharset charset);
 };
 
 } // End of namespace Wintermute
-- 
cgit v1.2.3