From 747ace78fc1767a549560c46d7689f1f8f1628d9 Mon Sep 17 00:00:00 2001 From: lolbot-iichan Date: Sun, 30 Jun 2019 16:45:46 +0300 Subject: COMMON & WINTERMUTE: Use non-1252 for 125X games (PR 1698) * WINTERMUTE: Add detection for "The Driller Incident" "The Driller Incident" is a small freeware game for Wintermute, avaliable in English and Russian: http://questzone.ru/enzi/files/1645 * WINTERMUTE: Add detection table for "One Helluva Day" demo "One Helluva Day" is a point-and-click adventure demo in English / Czech / Russian. Free download: https://store.steampowered.com/app/603680/One_helluva_day/ * WINTERMUTE: Support CHARSET property for TT fonts "// we don't need this anymore" was a mistake =) Surely, most Wintermute games are either designed for 1252 language (DE_DEU, EN_ANY, ES_ESP, FR_FRA, IT_ITA, PT_BRA), or don't use TrueType fonts (5ma, deadcity, grotter1, grotter2, thekite, tib), or use CHARSET=1 with UTF strings (dirtysplit, reversion1, reversion2, twc), which meen this conversion is not needed for those games. However, there are some games that explicitly states CHARSET=10 (driller (RU_RUS), oknytt (RU_RUS), onehelluvaday (UNK_LANG when playing as Russian)) and there are some games with CHARSET=1 with non-1252 in mind (bookofgron (RU_RUS excepts 1251), carolreed4 (RU_RUS excepts 1251), kulivocko (CZ_CZE excepts 1250)). This fixes text in some games: bookofgron, carolreed4, driller, kulivocko, oknytt, onehelluvaday. * WINTERMUTE: Break savegame compatibility sizeof(BaseFontTT) was changed, so let's break savegame compatibility * COMMON: Add conversion tables for win1253 and win1257 * COMMON: Add string conversion from U32String back to Common::String convertUtf32ToUtf8 code is based on Wintermute::ConvertUTF32toUTF8 convertFromU32String use convertUtf32ToUtf8 for UTF8 or lookup through conversion table for single-byte encodings * WINTERMUTE: Use Common::convert functions for non-UTF charsets * WINTERMUTE: Fix whitespaces at detection tables * WINTERMUTE: Add TODO comments --- engines/wintermute/utils/string_util.cpp | 137 +++++++++++++++++++++---------- engines/wintermute/utils/string_util.h | 6 +- 2 files changed, 97 insertions(+), 46 deletions(-) (limited to 'engines/wintermute/utils') diff --git a/engines/wintermute/utils/string_util.cpp b/engines/wintermute/utils/string_util.cpp index 82d4fe6902..d842b468db 100644 --- a/engines/wintermute/utils/string_util.cpp +++ b/engines/wintermute/utils/string_util.cpp @@ -26,7 +26,9 @@ * Copyright (c) 2011 Jan Nedoma */ +#include "common/language.h" #include "common/tokenizer.h" +#include "engines/wintermute/base/base_engine.h" #include "engines/wintermute/utils/string_util.h" #include "engines/wintermute/utils/convert_utf.h" @@ -96,48 +98,103 @@ Utf8String StringUtil::wideToUtf8(const WideString &WideStr) { } ////////////////////////////////////////////////////////////////////////// -WideString StringUtil::ansiToWide(const AnsiString &str) { - WideString result; - for (AnsiString::const_iterator i = str.begin(), end = str.end(); i != end; ++i) { - const byte c = *i; - if (c < 0x80 || c >= 0xA0) { - result += c; - } else { - uint32 utf32 = _ansiToUTF32[c - 0x80]; - if (utf32) { - result += utf32; - } else { - // It's an invalid CP1252 character... - } +Common::CodePage StringUtil::mapCodePage(TTextCharset charset) { + switch (charset) { + case CHARSET_EASTEUROPE: + return Common::kWindows1250; + + case CHARSET_RUSSIAN: + return Common::kWindows1251; + + case CHARSET_ANSI: + return Common::kWindows1252; + + case CHARSET_GREEK: + return Common::kWindows1253; + + case CHARSET_HEBREW: + return Common::kWindows1255; + + case CHARSET_BALTIC: + return Common::kWindows1257; + + case CHARSET_DEFAULT: + switch (BaseEngine::instance().getLanguage()) { + + //cp1250: Central Europe + case Common::CZ_CZE: + case Common::HR_HRV: + case Common::HU_HUN: + case Common::PL_POL: + case Common::SK_SVK: + return Common::kWindows1250; + + //cp1251: Cyrillic + case Common::RU_RUS: + case Common::UA_UKR: + return Common::kWindows1251; + + //cp1252: Western Europe + case Common::DA_DAN: + case Common::DE_DEU: + case Common::EN_ANY: + case Common::EN_GRB: + case Common::EN_USA: + case Common::ES_ESP: + case Common::FI_FIN: + case Common::FR_FRA: + case Common::IT_ITA: + case Common::NB_NOR: + case Common::NL_NLD: + case Common::PT_BRA: + case Common::PT_POR: + case Common::SE_SWE: + case Common::UNK_LANG: + return Common::kWindows1252; + + //cp1253: Greek + case Common::GR_GRE: + return Common::kWindows1253; + + //cp1255: Hebrew + case Common::HE_ISR: + return Common::kWindows1255; + + //cp1257: Baltic + case Common::ET_EST: + case Common::LV_LAT: + return Common::kWindows1257; + + default: + return Common::kWindows1252; } + + case CHARSET_OEM: + case CHARSET_CHINESEBIG5: + case CHARSET_GB2312: + case CHARSET_HANGUL: + case CHARSET_MAC: + case CHARSET_SHIFTJIS: + case CHARSET_SYMBOL: + case CHARSET_TURKISH: + case CHARSET_VIETNAMESE: + case CHARSET_JOHAB: + case CHARSET_ARABIC: + case CHARSET_THAI: + default: + warning("Unsupported charset: %d", charset); + return Common::kWindows1252; } - return result; } ////////////////////////////////////////////////////////////////////////// -AnsiString StringUtil::wideToAnsi(const WideString &wstr) { - AnsiString result; - for (WideString::const_iterator i = wstr.begin(), end = wstr.end(); i != end; ++i) { - const uint32 c = *i; - if (c < 0x80 || (c >= 0xA0 && c <= 0xFF)) { - result += c; - } else { - uint32 ansi = 0xFFFFFFFF; - for (uint j = 0; j < ARRAYSIZE(_ansiToUTF32); ++j) { - if (_ansiToUTF32[j] == c) { - ansi = j + 0x80; - break; - } - } - - if (ansi != 0xFFFFFFFF) { - result += ansi; - } else { - // There's no valid CP1252 code for this character... - } - } - } - return result; +WideString StringUtil::ansiToWide(const AnsiString &str, TTextCharset charset) { + return Common::convertToU32String(str.c_str(), mapCodePage(charset)); +} + +////////////////////////////////////////////////////////////////////////// +AnsiString StringUtil::wideToAnsi(const WideString &wstr, TTextCharset charset) { + return Common::convertFromU32String(wstr, mapCodePage(charset)); } ////////////////////////////////////////////////////////////////////////// @@ -172,10 +229,4 @@ AnsiString StringUtil::toString(int val) { return Common::String::format("%d", val); } -// Mapping of CP1252 characters 0x80...0x9F into UTF-32 -uint32 StringUtil::_ansiToUTF32[32] = { - 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, - 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178 -}; - } // End of namespace Wintermute diff --git a/engines/wintermute/utils/string_util.h b/engines/wintermute/utils/string_util.h index 431d401d96..4657c66766 100644 --- a/engines/wintermute/utils/string_util.h +++ b/engines/wintermute/utils/string_util.h @@ -39,8 +39,8 @@ public: //static bool compareNoCase(const WideString &str1, const WideString &str2); static WideString utf8ToWide(const Utf8String &Utf8Str); static Utf8String wideToUtf8(const WideString &WideStr); - static WideString ansiToWide(const AnsiString &str); - static AnsiString wideToAnsi(const WideString &str); + static WideString ansiToWide(const AnsiString &str, TTextCharset charset = CHARSET_ANSI); + static AnsiString wideToAnsi(const WideString &str, TTextCharset charset = CHARSET_ANSI); static bool isUtf8BOM(const byte *buffer, uint32 bufferSize); static int indexOf(const WideString &str, const WideString &toFind, size_t startFrom); @@ -51,7 +51,7 @@ public: static AnsiString toString(int val); private: - static uint32 _ansiToUTF32[32]; + static Common::CodePage mapCodePage(TTextCharset charset); }; } // End of namespace Wintermute -- cgit v1.2.3