From 4c153039715b4a2e5be7c10009c0392754e3c30f Mon Sep 17 00:00:00 2001 From: Paul Gilbert Date: Thu, 22 Jun 2017 20:52:39 -0400 Subject: TITANIC: Implement German parser preprocessing --- engines/titanic/true_talk/tt_parser.cpp | 37 +++++++++++++++++++++++++++++++++ engines/titanic/true_talk/tt_parser.h | 7 +++++++ 2 files changed, 44 insertions(+) (limited to 'engines') diff --git a/engines/titanic/true_talk/tt_parser.cpp b/engines/titanic/true_talk/tt_parser.cpp index 5ace6ec1e0..2893c50a8d 100644 --- a/engines/titanic/true_talk/tt_parser.cpp +++ b/engines/titanic/true_talk/tt_parser.cpp @@ -62,6 +62,8 @@ void TTparser::loadArrays() { loadArray(_replacements1, "TEXT/REPLACEMENTS1"); loadArray(_replacements2, "TEXT/REPLACEMENTS2"); loadArray(_replacements3, "TEXT/REPLACEMENTS3"); + if (g_vm->isGerman()) + loadArray(_replacements4, "TEXT/REPLACEMENTS4"); loadArray(_phrases, "TEXT/PHRASES"); loadArray(_pronouns, "TEXT/PRONOUNS"); @@ -81,6 +83,9 @@ int TTparser::preprocess(TTsentence *sentence) { if (normalize(sentence)) return 0; + if (g_vm->isGerman()) + preprocessGerman(sentence->_normalizedLine); + // Scan for and replace common slang and contractions with verbose versions searchAndReplace(sentence->_normalizedLine, _replacements1); searchAndReplace(sentence->_normalizedLine, _replacements2); @@ -1718,4 +1723,36 @@ int TTparser::processModifiers(int modifier, TTword *word) { return 0; } +void TTparser::preprocessGerman(TTstring &line) { + static const char *const SUFFIXES[12] = { + " ", "est ", "em ", "en ", "er ", "es ", + "et ", "st ", "s ", "e ", "n ", "t " + }; + + for (uint idx = 0; idx < _replacements4.size(); idx += 3) { + if (!line.hasSuffix(_replacements4[idx + 2])) + continue; + const char *lineP = line.c_str(); + const char *p = strstr(lineP, _replacements4[idx].c_str()); + if (!p || p == lineP || *(p - 1) != ' ') + continue; + + const char *wordEndP = p + _replacements4[idx].size(); + + for (int sIdx = 0; sIdx < 12; ++sIdx) { + const char *suffixP = SUFFIXES[sIdx]; + if (!strncmp(wordEndP, suffixP, strlen(suffixP))) { + // Form a new line with the replacement word + const char *nextWordP = wordEndP + strlen(suffixP); + line = Common::String::format("%s %s %s", + Common::String(lineP, p).c_str(), + _replacements4[idx + 1].c_str(), + nextWordP + ); + return; + } + } + } +} + } // End of namespace Titanic diff --git a/engines/titanic/true_talk/tt_parser.h b/engines/titanic/true_talk/tt_parser.h index 458a719e1f..d39cf50c33 100644 --- a/engines/titanic/true_talk/tt_parser.h +++ b/engines/titanic/true_talk/tt_parser.h @@ -69,6 +69,7 @@ private: StringArray _replacements1; StringArray _replacements2; StringArray _replacements3; + StringArray _replacements4; StringArray _phrases; NumberArray _numbers; TTparserNode *_nodesP; @@ -169,6 +170,12 @@ private: */ void removeNode(TTparserNode *node); + /** + * Handles any preprocessing for the German version + * @param line Line to check and update + */ + void preprocessGerman(TTstring &line); + int processModifiers(int modifier, TTword *word); int checkForAction(); -- cgit v1.2.3