6 files changed, 94 insertions, 205 deletions
diff --git a/engines/wintermute/base/font/base_font_truetype.cpp b/engines/wintermute/base/font/base_font_truetype.cpp
index d6f09141c9..b879e789e3 100644
--- a/engines/wintermute/base/font/base_font_truetype.cpp
+++ b/engines/wintermute/base/font/base_font_truetype.cpp
@@ -121,7 +121,7 @@ int BaseFontTT::getTextWidth(const byte *text, int maxLength) {
 	}
 
 	if (maxLength >= 0 && textStr.size() > (uint32)maxLength) {
-		textStr = Common::String(textStr.c_str(), (uint32)maxLength);
+		textStr = WideString(textStr.c_str(), (uint32)maxLength);
 	}
 	//text = text.substr(0, MaxLength); // TODO: Remove
 
@@ -155,19 +155,19 @@ void BaseFontTT::drawText(const byte *text, int x, int y, int width, TTextAlign
 		return;
 	}
 
-	WideString textStr = (const char *)text;
+	WideString textStr;
 
 	// TODO: Why do we still insist on Widestrings everywhere?
-	/*  if (_gameRef->_textEncoding == TEXT_UTF8) text = StringUtil::Utf8ToWide((char *)Text);
-	        else text = StringUtil::AnsiToWide((char *)Text);*/
 	// HACK: J.U.L.I.A. uses CP1252, we need to fix that,
 	// And we still don't have any UTF8-support.
-	if (_gameRef->_textEncoding != TEXT_UTF8) {
+	if (_gameRef->_textEncoding == TEXT_UTF8) {
+		textStr = StringUtil::utf8ToWide((const char *)text);
+	} else {
 		textStr = StringUtil::ansiToWide((const char *)text);
 	}
 
 	if (maxLength >= 0 && textStr.size() > (uint32)maxLength) {
-		textStr = Common::String(textStr.c_str(), (uint32)maxLength);
+		textStr = WideString(textStr.c_str(), (uint32)maxLength);
 	}
 	//text = text.substr(0, MaxLength); // TODO: Remove
 
@@ -248,7 +248,7 @@ BaseSurface *BaseFontTT::renderTextToTexture(const WideString &text, int width,
 	//TextLineList lines;
 	// TODO: Use WideString-conversion here.
 	//WrapText(text, width, maxHeight, lines);
-	Common::Array<Common::String> lines;
+	Common::Array<WideString> lines;
 	_font->wordWrapText(text, width, lines);
 
 	while (maxHeight > 0 && lines.size() * _lineHeight > maxHeight) {
@@ -267,7 +267,8 @@ BaseSurface *BaseFontTT::renderTextToTexture(const WideString &text, int width,
 		alignment = Graphics::kTextAlignRight;
 	}
 
-	debugC(kWintermuteDebugFont, "%s %d %d %d %d", text.c_str(), RGBCOLGetR(_layers[0]->_color), RGBCOLGetG(_layers[0]->_color), RGBCOLGetB(_layers[0]->_color), RGBCOLGetA(_layers[0]->_color));
+	// TODO: This debug call does not work with WideString because text.c_str() returns an uint32 array.
+	//debugC(kWintermuteDebugFont, "%s %d %d %d %d", text.c_str(), RGBCOLGetR(_layers[0]->_color), RGBCOLGetG(_layers[0]->_color), RGBCOLGetB(_layers[0]->_color), RGBCOLGetA(_layers[0]->_color));
 //	void drawString(Surface *dst, const Common::String &str, int x, int y, int w, uint32 color, TextAlign align = kTextAlignLeft, int deltax = 0, bool useEllipsis = true) const;
 	Graphics::Surface *surface = new Graphics::Surface();
 	if (_deletableFont) { // We actually have a TTF
@@ -276,7 +277,7 @@ BaseSurface *BaseFontTT::renderTextToTexture(const WideString &text, int width,
 		surface->create((uint16)width, (uint16)(_lineHeight * lines.size()), Graphics::PixelFormat(2, 5, 5, 5, 1, 11, 6, 1, 0));
 	}
 	uint32 useColor = 0xffffffff;
-	Common::Array<Common::String>::iterator it;
+	Common::Array<WideString>::iterator it;
 	int heightOffset = 0;
 	for (it = lines.begin(); it != lines.end(); ++it) {
 		_font->drawString(surface, *it, 0, heightOffset, width, useColor, alignment);
@@ -647,9 +648,9 @@ void BaseFontTT::measureText(const WideString &text, int maxWidth, int maxHeight
 	//TextLineList lines;
 
 	if (maxWidth >= 0) {
-		Common::Array<Common::String> lines;
+		Common::Array<WideString> lines;
 		_font->wordWrapText(text, maxWidth, lines);
-		Common::Array<Common::String>::iterator it;
+		Common::Array<WideString>::iterator it;
 		textWidth = 0;
 		for (it = lines.begin(); it != lines.end(); ++it) {
 			textWidth = MAX(textWidth, _font->getStringWidth(*it));
diff --git a/engines/wintermute/base/font/base_font_truetype.h b/engines/wintermute/base/font/base_font_truetype.h
index 7a96cdf1b7..edb41a155f 100644
--- a/engines/wintermute/base/font/base_font_truetype.h
+++ b/engines/wintermute/base/font/base_font_truetype.h
@@ -56,9 +56,8 @@ private:
 		bool _marked;
 		uint32 _lastUsed;
 
-		BaseCachedTTFontText() {
+		BaseCachedTTFontText() : _text() {
 			//_text = L"";
-			_text = "";
 			_width = _maxHeight = _maxLength = -1;
 			_align = TAL_LEFT;
 			_surface = nullptr;
diff --git a/engines/wintermute/base/scriptables/script_ext_string.cpp b/engines/wintermute/base/scriptables/script_ext_string.cpp
index b6d284442d..65bec03bc1 100644
--- a/engines/wintermute/base/scriptables/script_ext_string.cpp
+++ b/engines/wintermute/base/scriptables/script_ext_string.cpp
@@ -298,21 +298,13 @@ bool SXString::scCallMethod(ScScript *script, ScStack *stack, ScStack *thisStack
 
 		uint32 start = 0;
 		for(uint32 i = 0; i < str.size() + 1; i++) {
-			char ch = str.c_str()[i];
-			if(ch=='\0' || delims.contains(ch))
-			{
-				char *part = new char[i - start + 1];
-				if(i != start) {
-					Common::strlcpy(part, str.c_str() + start, i - start + 1);
-					part[i - start] = '\0';
+			uint32 ch = str[i];
+			if (ch =='\0' || delims.contains(ch)) {
+				if (i != start) {
+					parts.push_back(WideString(str.c_str() + start, i - start + 1));
 				} else {
-					part[0] = '\0';
+					parts.push_back(WideString());
 				}
-				val = new ScValue(_gameRef, part);
-				array->push(val);
-				delete[] part;
-				delete val;
-				val = nullptr;
 				start = i + 1;
 			}
 		}
diff --git a/engines/wintermute/dctypes.h b/engines/wintermute/dctypes.h
index b40322147f..4371ee4889 100644
--- a/engines/wintermute/dctypes.h
+++ b/engines/wintermute/dctypes.h
@@ -31,6 +31,7 @@
 
 
 #include "common/str.h"
+#include "common/ustr.h"
 #include "common/list.h"
 #include "common/array.h"
 
@@ -41,7 +42,7 @@ namespace Wintermute {
 //typedef std::wstring WideString;
 typedef Common::String AnsiString;
 typedef Common::String Utf8String;
-typedef Common::String WideString; // NB: Not actually true I presume.
+typedef Common::U32String WideString;
 
 typedef Common::List<WideString> WideStringList;
 typedef Common::List<AnsiString> AnsiStringList;
diff --git a/engines/wintermute/utils/string_util.cpp b/engines/wintermute/utils/string_util.cpp
index d5d6c7f702..702dd04c27 100644
--- a/engines/wintermute/utils/string_util.cpp
+++ b/engines/wintermute/utils/string_util.cpp
@@ -48,201 +48,96 @@ bool StringUtil::compareNoCase(const AnsiString &str1, const AnsiString &str2) {
     return (str1lc == str2lc);
 }*/
 
-Common::String StringUtil::substituteUtf8Characters(Common::String &str) {
-	uint strSize = str.size();
-	Common::String punctuation("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
-
-	if (isAscii(str))
-		return str;
-
-	for (uint32 i = 0; i < strSize; i++) {
-		if (!Common::isAlnum(str[i]) && str[i] != ' ' && !punctuation.contains(str[i])) {
-			// Replace some UTF-8 characters with (almost) equivalent ANSII ones
-			if ((byte)str[i] == 0xc2 && i + 1 < str.size() && (byte)str[i + 1] == 0xa9) {
-				// UTF-8 copyright character, substitute with 'c'
-				str.deleteChar(i);
-				str.setChar('c', i);
-				strSize--;
-			}
-		}
-	}
-
-	return str;
-}
-
-bool StringUtil::isAscii(const Common::String &str) {
-	Common::String punctuation("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
-
-	for (uint32 i = 0; i < str.size(); i++) {
-		if (!Common::isAlnum(str[i]) && str[i] != ' ' && !punctuation.contains(str[i]))
-			return false;
-	}
-
-	return true;
-}
-
 //////////////////////////////////////////////////////////////////////////
 WideString StringUtil::utf8ToWide(const Utf8String &Utf8Str) {
-	// WORKAROUND: Since wide strings aren't supported yet, we make this function
-	// work at least with ASCII strings. This should cover all English versions.
-	Common::String asciiString = Utf8Str;
-	asciiString = substituteUtf8Characters(asciiString);
-	if (isAscii(asciiString)) {
-		// No special (UTF-8) characters found, just return the string
-		return asciiString;
-	} else {
-		warning("String contains special (UTF-8) characters: '%s'", Utf8Str.c_str());
-	}
-
-	error("StringUtil::Utf8ToWide - WideString not supported yet for UTF-8 characters");
-
-	/*  size_t WideSize = Utf8Str.size();
-
-	    if (sizeof(wchar_t) == 2) {
-	        wchar_t *WideStringNative = new wchar_t[WideSize + 1];
-
-	        const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(Utf8Str.c_str());
-	        const UTF8 *SourceEnd = SourceStart + WideSize;
+	size_t wideSize = Utf8Str.size();
 
-	        UTF16 *TargetStart = reinterpret_cast<UTF16 *>(WideStringNative);
-	        UTF16 *TargetEnd = TargetStart + WideSize + 1;
+	uint32 *wideStringNative = new uint32[wideSize + 1];
 
-	        ConversionResult res = ConvertUTF8toUTF16(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
-	        if (res != conversionOK) {
-	            delete[] WideStringNative;
-	            return L"";
-	        }
-	        *TargetStart = 0;
-	        WideString ResultString(WideStringNative);
-	        delete[] WideStringNative;
+	const UTF8 *sourceStart = reinterpret_cast<const UTF8 *>(Utf8Str.c_str());
+	const UTF8 *sourceEnd = sourceStart + wideSize;
 
-	        return ResultString;
-	    } else if (sizeof(wchar_t) == 4) {
-	        wchar_t *WideStringNative = new wchar_t[WideSize + 1];
+	UTF32 *targetStart = reinterpret_cast<UTF32 *>(wideStringNative);
+	UTF32 *targetEnd = targetStart + wideSize;
 
-	        const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(Utf8Str.c_str());
-	        const UTF8 *SourceEnd = SourceStart + WideSize;
-
-	        UTF32 *TargetStart = reinterpret_cast<UTF32 *>(WideStringNative);
-	        UTF32 *TargetEnd = TargetStart + WideSize;
-
-	        ConversionResult res = ConvertUTF8toUTF32(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
-	        if (res != conversionOK) {
-	            delete[] WideStringNative;
-	            return L"";
-	        }
-	        *TargetStart = 0;
-	        WideString ResultString(WideStringNative);
-	        delete[] WideStringNative;
-
-	        return ResultString;
-	    } else {
-	        return L"";
-	    }*/
-	return "";
+	ConversionResult res = ConvertUTF8toUTF32(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
+	if (res != conversionOK) {
+		delete[] wideStringNative;
+		return WideString();
+	}
+	*targetStart = 0;
+	WideString resultString(wideStringNative);
+	delete[] wideStringNative;
+	return resultString;
 }
 
 //////////////////////////////////////////////////////////////////////////
 Utf8String StringUtil::wideToUtf8(const WideString &WideStr) {
-	// WORKAROUND: Since UTF-8 strings aren't supported yet, we make this function
-	// work at least with ASCII strings. This should cover all English versions.
-	Common::String asciiString = WideStr;
-	asciiString = substituteUtf8Characters(asciiString);
-	if (isAscii(asciiString)) {
-		// No special (UTF-8) characters found, just return the string
-		return asciiString;
-	} else {
-		warning("String contains special (UTF-8) characters: '%s'", WideStr.c_str());
-	}
+	size_t wideSize = WideStr.size();
 
-	error("StringUtil::wideToUtf8 - WideString not supported yet for UTF-8 characters");
-	
-	/*  size_t WideSize = WideStr.length();
+	size_t utf8Size = 4 * wideSize + 1;
+	char *utf8StringNative = new char[utf8Size];
 
-	    if (sizeof(wchar_t) == 2) {
-	        size_t utf8Size = 3 * WideSize + 1;
-	        char *utf8StringNative = new char[Utf8Size];
+	const UTF32 *sourceStart = reinterpret_cast<const UTF32 *>(WideStr.c_str());
+	const UTF32 *sourceEnd = sourceStart + wideSize;
 
-	        const UTF16 *SourceStart = reinterpret_cast<const UTF16 *>(WideStr.c_str());
-	        const UTF16 *SourceEnd = SourceStart + WideSize;
+	UTF8 *targetStart = reinterpret_cast<UTF8 *>(utf8StringNative);
+	UTF8 *targetEnd = targetStart + utf8Size;
 
-	        UTF8 *TargetStart = reinterpret_cast<UTF8 *>(Utf8StringNative);
-	        UTF8 *TargetEnd = TargetStart + Utf8Size;
-
-	        ConversionResult res = ConvertUTF16toUTF8(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
-	        if (res != conversionOK) {
-	            delete[] Utf8StringNative;
-	            return (Utf8String)"";
-	        }
-	        *TargetStart = 0;
-	        Utf8String ResultString(Utf8StringNative);
-	        delete[] Utf8StringNative;
-	        return ResultString;
-	    } else if (sizeof(wchar_t) == 4) {
-	        size_t utf8Size = 4 * WideSize + 1;
-	        char *utf8StringNative = new char[Utf8Size];
-
-	        const UTF32 *SourceStart = reinterpret_cast<const UTF32 *>(WideStr.c_str());
-	        const UTF32 *SourceEnd = SourceStart + WideSize;
-
-	        UTF8 *TargetStart = reinterpret_cast<UTF8 *>(Utf8StringNative);
-	        UTF8 *TargetEnd = TargetStart + Utf8Size;
-
-	        ConversionResult res = ConvertUTF32toUTF8(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
-	        if (res != conversionOK) {
-	            delete[] Utf8StringNative;
-	            return (Utf8String)"";
-	        }
-	        *TargetStart = 0;
-	        Utf8String ResultString(Utf8StringNative);
-	        delete[] Utf8StringNative;
-	        return ResultString;
-	    } else {
-	        return (Utf8String)"";
-	    }*/
-	return "";
+	ConversionResult res = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
+	if (res != conversionOK) {
+		delete[] utf8StringNative;
+		return Utf8String();
+	}
+	*targetStart = 0;
+	Utf8String resultString(utf8StringNative);
+	delete[] utf8StringNative;
+	return resultString;
 }
 
 //////////////////////////////////////////////////////////////////////////
 WideString StringUtil::ansiToWide(const AnsiString &str) {
-	// TODO: This function gets called a lot, so warnings like these drown out the usefull information
-	Common::String converted = "";
-	uint32 index = 0;
-	while (index != str.size()) {
-		byte c = str[index];
-		if (c == 146) {
-			converted += (char)39;  // Replace right-quote with apostrophe
-		} else if (c == 133) {
-			converted += Common::String("..."); // Replace ...-symbol with ...
+	WideString result;
+	for (AnsiString::const_iterator i = str.begin(), end = str.end(); i != end; ++i) {
+		const byte c = *i;
+		if (c < 0x80 || c >= 0xA0) {
+			result += c;
 		} else {
-			converted += c;
+			uint32 utf32 = _ansiToUTF32[c - 0x80];
+			if (utf32) {
+				result += utf32;
+			} else {
+				// It's an invalid CP1252 character...
+			}
 		}
-		index++;
 	}
-	// using default os locale!
-
-	/*  setlocale(LC_CTYPE, "");
-	    size_t wideSize = mbstowcs(NULL, str.c_str(), 0) + 1;
-	    wchar_t *wstr = new wchar_t[WideSize];
-	    mbstowcs(wstr, str.c_str(), WideSize);
-	    WideString ResultString(wstr);
-	    delete[] wstr;
-	    return ResultString;*/
-	return WideString(converted);
+	return result;
 }
 
 //////////////////////////////////////////////////////////////////////////
 AnsiString StringUtil::wideToAnsi(const WideString &wstr) {
-	// using default os locale!
-	// TODO: This function gets called a lot, so warnings like these drown out the usefull information
-	/*  setlocale(LC_CTYPE, "");
-	    size_t wideSize = wcstombs(NULL, wstr.c_str(), 0) + 1;
-	    char *str = new char[WideSize];
-	    wcstombs(str, wstr.c_str(), WideSize);
-	    AnsiString ResultString(str);
-	    delete[] str;
-	    return ResultString;*/
-	return AnsiString(wstr);
+	AnsiString result;
+	for (WideString::const_iterator i = wstr.begin(), end = wstr.end(); i != end; ++i) {
+		const uint32 c = *i;
+		if (c < 0x80 || (c >= 0xA0 && c <= 0xFF)) {
+			result += c;
+		} else {
+			uint32 ansi = 0xFFFFFFFF;
+			for (uint j = 0; j < ARRAYSIZE(_ansiToUTF32); ++j) {
+				if (_ansiToUTF32[j] == c) {
+					ansi = j + 0x80;
+					break;
+				}
+			}
+
+			if (ansi != 0xFFFFFFFF) {
+				result += ansi;
+			} else {
+				// There's no valid CP1252 code for this character...
+			}
+		}
+	}
+	return result;
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -256,12 +151,7 @@ bool StringUtil::isUtf8BOM(const byte *buffer, uint32 bufferSize) {
 
 //////////////////////////////////////////////////////////////////////////
 int StringUtil::indexOf(const WideString &str, const WideString &toFind, size_t startFrom) {
-	const char *index = strstr(str.c_str(), toFind.c_str());
-	if (index == nullptr) {
-		return -1;
-	} else {
-		return index - str.c_str();
-	}
+	return str.find(toFind, startFrom);
 }
 
 Common::String StringUtil::encodeSetting(const Common::String &str) {
@@ -282,5 +172,10 @@ AnsiString StringUtil::toString(int val) {
 	return Common::String::format("%d", val);
 }
 
+// Mapping of CP1252 characters 0x80...0x9F into UTF-32
+uint32 StringUtil::_ansiToUTF32[32] = {
+	0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
+	0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
+};
 
 } // End of namespace Wintermute
diff --git a/engines/wintermute/utils/string_util.h b/engines/wintermute/utils/string_util.h
index 05931beb79..14c40fcb2b 100644
--- a/engines/wintermute/utils/string_util.h
+++ b/engines/wintermute/utils/string_util.h
@@ -37,8 +37,6 @@ class StringUtil {
 public:
 	static bool compareNoCase(const AnsiString &str1, const AnsiString &str2);
 	//static bool compareNoCase(const WideString &str1, const WideString &str2);
-	static bool isAscii(const Common::String &str);
-	static Common::String substituteUtf8Characters(Common::String &str);
 	static WideString utf8ToWide(const Utf8String &Utf8Str);
 	static Utf8String wideToUtf8(const WideString &WideStr);
 	static WideString ansiToWide(const AnsiString &str);
@@ -51,6 +49,9 @@ public:
 	static Common::String decodeSetting(const Common::String &str);
 
 	static AnsiString toString(int val);
+
+private:
+	static uint32 _ansiToUTF32[32];
 };
 
 } // End of namespace Wintermute