diff options
Diffstat (limited to 'engines/sci')
-rw-r--r-- | engines/sci/console.cpp | 25 | ||||
-rw-r--r-- | engines/sci/engine/kparse.cpp | 15 | ||||
-rw-r--r-- | engines/sci/parser/grammar.cpp | 79 | ||||
-rw-r--r-- | engines/sci/parser/vocabulary.cpp | 105 | ||||
-rw-r--r-- | engines/sci/parser/vocabulary.h | 17 |
5 files changed, 173 insertions, 68 deletions
diff --git a/engines/sci/console.cpp b/engines/sci/console.cpp index c26cd60a25..51ec9cfe9e 100644 --- a/engines/sci/console.cpp +++ b/engines/sci/console.cpp @@ -1204,7 +1204,6 @@ bool Console::cmdParse(int argc, const char **argv) { return true; } - ResultWordList words; char *error; char string[1000]; @@ -1216,6 +1215,8 @@ bool Console::cmdParse(int argc, const char **argv) { } DebugPrintf("Parsing '%s'\n", string); + + ResultWordListList words; bool res = _engine->getVocabulary()->tokenizeString(words, string, &error); if (res && !words.empty()) { int syntax_fail = 0; @@ -1224,8 +1225,13 @@ bool Console::cmdParse(int argc, const char **argv) { DebugPrintf("Parsed to the following blocks:\n"); - for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) - DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group); + for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) { + DebugPrintf(" "); + for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) { + DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group); + } + DebugPrintf("\n"); + } if (_engine->getVocabulary()->parseGNF(words, true)) syntax_fail = 1; // Building a tree failed @@ -1252,7 +1258,6 @@ bool Console::cmdSaid(int argc, const char **argv) { return true; } - ResultWordList words; char *error; char string[1000]; byte spec[1000]; @@ -1326,6 +1331,7 @@ bool Console::cmdSaid(int argc, const char **argv) { _engine->getVocabulary()->debugDecipherSaidBlock(spec); printf("\n"); + ResultWordListList words; bool res = _engine->getVocabulary()->tokenizeString(words, string, &error); if (res && !words.empty()) { int syntax_fail = 0; @@ -1334,8 +1340,15 @@ bool Console::cmdSaid(int argc, const char **argv) { DebugPrintf("Parsed to the following blocks:\n"); - for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) - DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group); + for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) { + DebugPrintf(" "); + for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) { + DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group); + } + DebugPrintf("\n"); + } + + if (_engine->getVocabulary()->parseGNF(words, true)) syntax_fail = 1; // Building a tree failed diff --git a/engines/sci/engine/kparse.cpp b/engines/sci/engine/kparse.cpp index be32b340bb..6a052a582d 100644 --- a/engines/sci/engine/kparse.cpp +++ b/engines/sci/engine/kparse.cpp @@ -93,13 +93,13 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) { reg_t stringpos = argv[0]; Common::String string = s->_segMan->getString(stringpos); char *error; - ResultWordList words; reg_t event = argv[1]; g_sci->checkVocabularySwitch(); Vocabulary *voc = g_sci->getVocabulary(); voc->parser_event = event; reg_t params[2] = { voc->parser_base, stringpos }; + ResultWordListList words; bool res = voc->tokenizeString(words, string.c_str(), &error); voc->parserIsValid = false; /* not valid */ @@ -109,10 +109,15 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) { s->r_acc = make_reg(0, 1); #ifdef DEBUG_PARSER - debugC(2, kDebugLevelParser, "Parsed to the following blocks:"); - - for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i) - debugC(2, kDebugLevelParser, " Type[%04x] Group[%04x]", i->_class, i->_group); + debugC(2, kDebugLevelParser, "Parsed to the following blocks:"); + + for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) { + debugCN(2, kDebugLevelParser, " "); + for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) { + debugCN(2, kDebugLevelParser, "%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group); + } + debugCN(2, kDebugLevelParser, "\n"); + } #endif int syntax_fail = voc->parseGNF(words); diff --git a/engines/sci/parser/grammar.cpp b/engines/sci/parser/grammar.cpp index ae5ff81045..03e9d29660 100644 --- a/engines/sci/parser/grammar.cpp +++ b/engines/sci/parser/grammar.cpp @@ -38,8 +38,9 @@ namespace Sci { #define TOKEN_CPAREN 0xfe000000 #define TOKEN_TERMINAL_CLASS 0x10000 #define TOKEN_TERMINAL_GROUP 0x20000 -#define TOKEN_STUFFING_WORD 0x40000 -#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_WORD) +#define TOKEN_STUFFING_LEAF 0x40000 +#define TOKEN_STUFFING_WORD 0x80000 +#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_LEAF | TOKEN_STUFFING_WORD) #define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP) static int _allocd_rules = 0; // FIXME: Avoid non-const global vars @@ -122,8 +123,10 @@ static void vocab_print_rule(ParseRule *rule) { printf("C(%04x)", token & 0xffff); else if (token & TOKEN_TERMINAL_GROUP) printf("G(%04x)", token & 0xffff); - else if (token & TOKEN_STUFFING_WORD) + else if (token & TOKEN_STUFFING_LEAF) printf("%03x", token & 0xffff); + else if (token & TOKEN_STUFFING_WORD) + printf("{%03x}", token & 0xffff); else printf("[%03x]", token); /* non-terminal */ wspace = 1; @@ -206,8 +209,8 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) { rule->_data[tokens++] = value | TOKEN_STUFFING_WORD; else { // normal inductive rule rule->_data[tokens++] = TOKEN_OPAREN; - rule->_data[tokens++] = type | TOKEN_STUFFING_WORD; - rule->_data[tokens++] = value | TOKEN_STUFFING_WORD; + rule->_data[tokens++] = type | TOKEN_STUFFING_LEAF; + rule->_data[tokens++] = value | TOKEN_STUFFING_LEAF; if (i == 0) rule->_firstSpecial = tokens; @@ -220,7 +223,7 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) { return rule; } -static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) { +static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWordList &input) { int dep; if (!rule->_numSpecials) @@ -228,11 +231,32 @@ static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) { dep = rule->_data[rule->_firstSpecial]; - if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input._class)) || - ((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input._group))) { + int count = 0; + int match = 0; + ResultWordList::const_iterator iter; + // TODO: Inserting an array in the middle of another array is slow + Common::Array<int> matches; + matches.reserve(input.size()); + + // We store the first match in 'match', and any subsequent matches in + // 'matches'. 'match' replaces the special in the rule, and 'matches' gets + // inserted after it. + for (iter = input.begin(); iter != input.end(); ++iter) + if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & iter->_class)) || + ((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & iter->_group))) { + if (count == 0) + match = TOKEN_STUFFING_WORD | iter->_group; + else + matches.push_back(TOKEN_STUFFING_WORD | iter->_group); + count++; + } + + if (count) { ParseRule *retval = new ParseRule(*rule); ++_allocd_rules; - retval->_data[rule->_firstSpecial] = TOKEN_STUFFING_WORD | input._group; + retval->_data[rule->_firstSpecial] = match; + if (count > 1) + retval->_data.insert_at(rule->_firstSpecial+1, matches); retval->_numSpecials--; retval->_firstSpecial = 0; @@ -458,6 +482,25 @@ static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) nodes[base].right = 0; return *pos; } +static int _vbpt_append_word(ParseTreeNode *nodes, int *pos, int base, int value) { + // writes one value to an existing node and creates a sibling for writing + nodes[base].type = kParseTreeWordNode; + nodes[base].value = value; + nodes[base].right = &nodes[++(*pos)]; + nodes[*pos].type = kParseTreeBranchNode; + nodes[*pos].left = 0; + nodes[*pos].right = 0; + return *pos; +} + +static int _vbpt_terminate_word(ParseTreeNode *nodes, int *pos, int base, int value) { + // Terminates, overwriting a nextwrite forknode + nodes[base].type = kParseTreeWordNode; + nodes[base].value = value; + nodes[base].right = 0; + return *pos; +} + static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) { uint token; @@ -470,11 +513,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule * nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN; if (nexttoken != TOKEN_CPAREN) writepos = _vbpt_parenc(nodes, pos, writepos); - } else if (token & TOKEN_STUFFING_WORD) { + } else if (token & TOKEN_STUFFING_LEAF) { if (nexttoken == TOKEN_CPAREN) writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff); else writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff); + } else if (token & TOKEN_STUFFING_WORD) { + if (nexttoken == TOKEN_CPAREN) + writepos = _vbpt_terminate_word(nodes, pos, writepos, token & 0xffff); + else + writepos = _vbpt_append_word(nodes, pos, writepos, token & 0xffff); } else { printf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule "); vocab_print_rule(rule); @@ -486,16 +534,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule * return rulepos; } -int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) { +int Vocabulary::parseGNF(const ResultWordListList &words, bool verbose) { Console *con = g_sci->getSciDebugger(); // Get the start rules: ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]); ParseRuleList *results = NULL; uint word = 0; const uint words_nr = words.size(); - ResultWordList::const_iterator word_iter = words.begin(); + ResultWordListList::const_iterator words_iter; - for (word_iter = words.begin(); word_iter != words.end(); ++word_iter, ++word) { + for (words_iter = words.begin(); words_iter != words.end(); ++words_iter, ++word) { ParseRuleList *new_work = NULL; ParseRuleList *reduced_rules = NULL; ParseRuleList *seeker, *subseeker; @@ -505,8 +553,9 @@ int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) { seeker = work; while (seeker) { - if (seeker->rule->_numSpecials <= (words_nr - word)) - reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *word_iter)); + if (seeker->rule->_numSpecials <= (words_nr - word)) { + reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *words_iter)); + } seeker = seeker->next; } diff --git a/engines/sci/parser/vocabulary.cpp b/engines/sci/parser/vocabulary.cpp index 139787eefc..7159e941d5 100644 --- a/engines/sci/parser/vocabulary.cpp +++ b/engines/sci/parser/vocabulary.cpp @@ -166,8 +166,14 @@ bool Vocabulary::loadParserWords() { newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4); newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8); - // Add the word to the list - _parserWords[currentWord] = newWord; + // SCI01 was the first version to support multiple class/group pairs + // per word, so we clear the list in earlier versions + // in earlier versions. + if (getSciVersion() < SCI_VERSION_01) + _parserWords[currentWord].clear(); + + // Add this to the list of possible class,group pairs for this word + _parserWords[currentWord].push_back(newWord); seeker += 3; } @@ -182,8 +188,9 @@ const char *Vocabulary::getAnyWordFromGroup(int group) { return "{nothing}"; for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) { - if (i->_value._group == group) - return i->_key.c_str(); + for (ResultWordList::const_iterator j = i->_value.begin(); j != i->_value.end(); ++j) + if (j->_group == group) + return i->_key.c_str(); } return "{invalid}"; @@ -266,7 +273,9 @@ bool Vocabulary::loadBranches() { } // we assume that *word points to an already lowercased word -ResultWord Vocabulary::lookupWord(const char *word, int word_len) { +void Vocabulary::lookupWord(ResultWordList& retval, const char *word, int word_len) { + retval.clear(); + Common::String tempword(word, word_len); // Remove all dashes from tempword @@ -278,15 +287,22 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) { } // Look it up: - WordMap::iterator dict_word = _parserWords.find(tempword); + WordMap::iterator dict_words = _parserWords.find(tempword); // Match found? Return it! - if (dict_word != _parserWords.end()) { - return dict_word->_value; + if (dict_words != _parserWords.end()) { + retval = dict_words->_value; + + // SCI01 was the first version to support + // multiple matches, so no need to look further + // in earlier versions. + if (getSciVersion() < SCI_VERSION_01) + return; + } // Now try all suffixes - for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix) + for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix) { if (suffix->alt_suffix_length <= word_len) { int suff_index = word_len - suffix->alt_suffix_length; @@ -299,27 +315,38 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) { // ...and append "correct" suffix tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length); - dict_word = _parserWords.find(tempword2); - - if ((dict_word != _parserWords.end()) && (dict_word->_value._class & suffix->class_mask)) { // Found it? - // Use suffix class - ResultWord tmp = dict_word->_value; - tmp._class = suffix->result_class; - return tmp; + dict_words = _parserWords.find(tempword2); + + if (dict_words != _parserWords.end()) { + for (ResultWordList::const_iterator j = dict_words->_value.begin(); j != dict_words->_value.end(); ++j) { + if (j->_class & suffix->class_mask) { // Found it? + // Use suffix class + ResultWord tmp = *j; + tmp._class = suffix->result_class; + retval.push_back(tmp); + + // SCI01 was the first version to support + // multiple matches, so no need to look further + // in earlier versions. + if (getSciVersion() < SCI_VERSION_01) + return; + } + } } } } + } + + if (!retval.empty()) + return; // No match so far? Check if it's a number. - ResultWord retval = { -1, -1 }; char *tester; if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here? ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP }; - retval = tmp; + retval.push_back(tmp); } - - return retval; } void Vocabulary::debugDecipherSaidBlock(const byte *addr) { @@ -398,7 +425,7 @@ static const byte lowerCaseMap[256] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0 }; -bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, char **error) { +bool Vocabulary::tokenizeString(ResultWordListList &retval, const char *sentence, char **error) { char currentWord[VOCAB_MAX_WORDLENGTH] = ""; int pos_in_sentence = 0; unsigned char c; @@ -419,10 +446,12 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch else { if (wordLen) { // Finished a word? - ResultWord lookup_result = lookupWord(currentWord, wordLen); + ResultWordList lookup_result; + // Look it up + lookupWord(lookup_result, currentWord, wordLen); - if (lookup_result._class == -1) { // Not found? + if (lookup_result.empty()) { // Not found? *error = (char *)calloc(wordLen + 1, 1); strncpy(*error, currentWord, wordLen); // Set the offending word retval.clear(); @@ -460,12 +489,14 @@ void Vocabulary::printSuffixes() const { void Vocabulary::printParserWords() const { Console *con = g_sci->getSciDebugger(); - int j = 0; + int n = 0; for (WordMap::iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) { - con->DebugPrintf("%4d: %03x [%03x] %20s |", j, i->_value._class, i->_value._group, i->_key.c_str()); - if (j % 3 == 0) - con->DebugPrintf("\n"); - j++; + for (ResultWordList::iterator j = i->_value.begin(); j != i->_value.end(); ++j) { + con->DebugPrintf("%4d: %03x [%03x] %20s |", n, j->_class, j->_group, i->_key.c_str()); + if (n % 3 == 0) + con->DebugPrintf("\n"); + n++; + } } con->DebugPrintf("\n"); @@ -527,8 +558,13 @@ void _vocab_recursive_ptree_dump(ParseTreeNode *tree, int blanks) { if (rbranch) { if (rbranch->type == kParseTreeBranchNode) _vocab_recursive_ptree_dump(rbranch, blanks); - else + else { printf("%x", rbranch->value); + while (rbranch->right) { + rbranch = rbranch->right; + printf("/%x", rbranch->value); + } + } }/* else printf("nil");*/ } @@ -546,14 +582,15 @@ void Vocabulary::dumpParseTree() { printf("))\n"); } -void Vocabulary::synonymizeTokens(ResultWordList &words) { +void Vocabulary::synonymizeTokens(ResultWordListList &words) { if (_synonyms.empty()) return; // No synonyms: Nothing to check - for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i) - for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync) - if (i->_group == sync->replaceant) - i->_group = sync->replacement; + for (ResultWordListList::iterator i = words.begin(); i != words.end(); ++i) + for (ResultWordList::iterator j = i->begin(); j != i->end(); ++j) + for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync) + if (j->_group == sync->replaceant) + j->_group = sync->replacement; } void Vocabulary::printParserNodes(int num) { diff --git a/engines/sci/parser/vocabulary.h b/engines/sci/parser/vocabulary.h index d4df8af715..1b37219bac 100644 --- a/engines/sci/parser/vocabulary.h +++ b/engines/sci/parser/vocabulary.h @@ -117,8 +117,9 @@ struct ResultWord { }; typedef Common::List<ResultWord> ResultWordList; +typedef Common::List<ResultWordList> ResultWordListList; -typedef Common::HashMap<Common::String, ResultWord, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap; +typedef Common::HashMap<Common::String, ResultWordList, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap; struct ParseRuleList; @@ -161,7 +162,7 @@ struct ParseTreeNode { ParseTypes type; /**< leaf or branch */ int value; /**< For leaves */ ParseTreeNode* left; /**< Left child, for branches */ - ParseTreeNode* right; /**< Right child, for branches */ + ParseTreeNode* right; /**< Right child, for branches (and word leaves) */ }; enum VocabularyVersions { @@ -186,11 +187,11 @@ public: /** * Looks up a single word in the words and suffixes list. + * @param retval the list of matches * @param word pointer to the word to look up * @param word_len length of the word to look up - * @return the matching word (or (-1,-1) if there was no match) */ - ResultWord lookupWord(const char *word, int word_len); + void lookupWord(ResultWordList &retval, const char *word, int word_len); /** @@ -204,7 +205,7 @@ public: * contain any useful words; if not, *error points to a malloc'd copy of * the offending word. The returned list may contain anywords. */ - bool tokenizeString(ResultWordList &retval, const char *sentence, char **error); + bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error); /** * Builds a parse tree from a list of words, using a set of Greibach Normal @@ -215,7 +216,7 @@ public: * nodes or if the sentence structure in 'words' is not part of the * language described by the grammar passed in 'rules'. */ - int parseGNF(const ResultWordList &words, bool verbose = false); + int parseGNF(const ResultWordListList &words, bool verbose = false); /** * Constructs the Greibach Normal Form of the grammar supplied in 'branches'. @@ -262,9 +263,9 @@ public: /** * Synonymizes a token list - * Parameters: (ResultWordList &) words: The word list to synonymize + * Parameters: (ResultWordListList &) words: The word list to synonymize */ - void synonymizeTokens(ResultWordList &words); + void synonymizeTokens(ResultWordListList &words); void printParserNodes(int num); |