aboutsummaryrefslogtreecommitdiff
path: root/engines/sci
diff options
context:
space:
mode:
Diffstat (limited to 'engines/sci')
-rw-r--r--engines/sci/console.cpp25
-rw-r--r--engines/sci/engine/kparse.cpp15
-rw-r--r--engines/sci/parser/grammar.cpp79
-rw-r--r--engines/sci/parser/vocabulary.cpp105
-rw-r--r--engines/sci/parser/vocabulary.h17
5 files changed, 173 insertions, 68 deletions
diff --git a/engines/sci/console.cpp b/engines/sci/console.cpp
index c26cd60a25..51ec9cfe9e 100644
--- a/engines/sci/console.cpp
+++ b/engines/sci/console.cpp
@@ -1204,7 +1204,6 @@ bool Console::cmdParse(int argc, const char **argv) {
return true;
}
- ResultWordList words;
char *error;
char string[1000];
@@ -1216,6 +1215,8 @@ bool Console::cmdParse(int argc, const char **argv) {
}
DebugPrintf("Parsing '%s'\n", string);
+
+ ResultWordListList words;
bool res = _engine->getVocabulary()->tokenizeString(words, string, &error);
if (res && !words.empty()) {
int syntax_fail = 0;
@@ -1224,8 +1225,13 @@ bool Console::cmdParse(int argc, const char **argv) {
DebugPrintf("Parsed to the following blocks:\n");
- for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i)
- DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group);
+ for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
+ DebugPrintf(" ");
+ for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
+ DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
+ }
+ DebugPrintf("\n");
+ }
if (_engine->getVocabulary()->parseGNF(words, true))
syntax_fail = 1; // Building a tree failed
@@ -1252,7 +1258,6 @@ bool Console::cmdSaid(int argc, const char **argv) {
return true;
}
- ResultWordList words;
char *error;
char string[1000];
byte spec[1000];
@@ -1326,6 +1331,7 @@ bool Console::cmdSaid(int argc, const char **argv) {
_engine->getVocabulary()->debugDecipherSaidBlock(spec);
printf("\n");
+ ResultWordListList words;
bool res = _engine->getVocabulary()->tokenizeString(words, string, &error);
if (res && !words.empty()) {
int syntax_fail = 0;
@@ -1334,8 +1340,15 @@ bool Console::cmdSaid(int argc, const char **argv) {
DebugPrintf("Parsed to the following blocks:\n");
- for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i)
- DebugPrintf(" Type[%04x] Group[%04x]\n", i->_class, i->_group);
+ for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
+ DebugPrintf(" ");
+ for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
+ DebugPrintf("%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
+ }
+ DebugPrintf("\n");
+ }
+
+
if (_engine->getVocabulary()->parseGNF(words, true))
syntax_fail = 1; // Building a tree failed
diff --git a/engines/sci/engine/kparse.cpp b/engines/sci/engine/kparse.cpp
index be32b340bb..6a052a582d 100644
--- a/engines/sci/engine/kparse.cpp
+++ b/engines/sci/engine/kparse.cpp
@@ -93,13 +93,13 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) {
reg_t stringpos = argv[0];
Common::String string = s->_segMan->getString(stringpos);
char *error;
- ResultWordList words;
reg_t event = argv[1];
g_sci->checkVocabularySwitch();
Vocabulary *voc = g_sci->getVocabulary();
voc->parser_event = event;
reg_t params[2] = { voc->parser_base, stringpos };
+ ResultWordListList words;
bool res = voc->tokenizeString(words, string.c_str(), &error);
voc->parserIsValid = false; /* not valid */
@@ -109,10 +109,15 @@ reg_t kParse(EngineState *s, int argc, reg_t *argv) {
s->r_acc = make_reg(0, 1);
#ifdef DEBUG_PARSER
- debugC(2, kDebugLevelParser, "Parsed to the following blocks:");
-
- for (ResultWordList::const_iterator i = words.begin(); i != words.end(); ++i)
- debugC(2, kDebugLevelParser, " Type[%04x] Group[%04x]", i->_class, i->_group);
+ debugC(2, kDebugLevelParser, "Parsed to the following blocks:");
+
+ for (ResultWordListList::const_iterator i = words.begin(); i != words.end(); ++i) {
+ debugCN(2, kDebugLevelParser, " ");
+ for (ResultWordList::const_iterator j = i->begin(); j != i->end(); ++j) {
+ debugCN(2, kDebugLevelParser, "%sType[%04x] Group[%04x]", j == i->begin() ? "" : " / ", j->_class, j->_group);
+ }
+ debugCN(2, kDebugLevelParser, "\n");
+ }
#endif
int syntax_fail = voc->parseGNF(words);
diff --git a/engines/sci/parser/grammar.cpp b/engines/sci/parser/grammar.cpp
index ae5ff81045..03e9d29660 100644
--- a/engines/sci/parser/grammar.cpp
+++ b/engines/sci/parser/grammar.cpp
@@ -38,8 +38,9 @@ namespace Sci {
#define TOKEN_CPAREN 0xfe000000
#define TOKEN_TERMINAL_CLASS 0x10000
#define TOKEN_TERMINAL_GROUP 0x20000
-#define TOKEN_STUFFING_WORD 0x40000
-#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_WORD)
+#define TOKEN_STUFFING_LEAF 0x40000
+#define TOKEN_STUFFING_WORD 0x80000
+#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_LEAF | TOKEN_STUFFING_WORD)
#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
static int _allocd_rules = 0; // FIXME: Avoid non-const global vars
@@ -122,8 +123,10 @@ static void vocab_print_rule(ParseRule *rule) {
printf("C(%04x)", token & 0xffff);
else if (token & TOKEN_TERMINAL_GROUP)
printf("G(%04x)", token & 0xffff);
- else if (token & TOKEN_STUFFING_WORD)
+ else if (token & TOKEN_STUFFING_LEAF)
printf("%03x", token & 0xffff);
+ else if (token & TOKEN_STUFFING_WORD)
+ printf("{%03x}", token & 0xffff);
else
printf("[%03x]", token); /* non-terminal */
wspace = 1;
@@ -206,8 +209,8 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
else { // normal inductive rule
rule->_data[tokens++] = TOKEN_OPAREN;
- rule->_data[tokens++] = type | TOKEN_STUFFING_WORD;
- rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
+ rule->_data[tokens++] = type | TOKEN_STUFFING_LEAF;
+ rule->_data[tokens++] = value | TOKEN_STUFFING_LEAF;
if (i == 0)
rule->_firstSpecial = tokens;
@@ -220,7 +223,7 @@ static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
return rule;
}
-static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) {
+static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWordList &input) {
int dep;
if (!rule->_numSpecials)
@@ -228,11 +231,32 @@ static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) {
dep = rule->_data[rule->_firstSpecial];
- if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input._class)) ||
- ((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input._group))) {
+ int count = 0;
+ int match = 0;
+ ResultWordList::const_iterator iter;
+ // TODO: Inserting an array in the middle of another array is slow
+ Common::Array<int> matches;
+ matches.reserve(input.size());
+
+ // We store the first match in 'match', and any subsequent matches in
+ // 'matches'. 'match' replaces the special in the rule, and 'matches' gets
+ // inserted after it.
+ for (iter = input.begin(); iter != input.end(); ++iter)
+ if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & iter->_class)) ||
+ ((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & iter->_group))) {
+ if (count == 0)
+ match = TOKEN_STUFFING_WORD | iter->_group;
+ else
+ matches.push_back(TOKEN_STUFFING_WORD | iter->_group);
+ count++;
+ }
+
+ if (count) {
ParseRule *retval = new ParseRule(*rule);
++_allocd_rules;
- retval->_data[rule->_firstSpecial] = TOKEN_STUFFING_WORD | input._group;
+ retval->_data[rule->_firstSpecial] = match;
+ if (count > 1)
+ retval->_data.insert_at(rule->_firstSpecial+1, matches);
retval->_numSpecials--;
retval->_firstSpecial = 0;
@@ -458,6 +482,25 @@ static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value)
nodes[base].right = 0;
return *pos;
}
+static int _vbpt_append_word(ParseTreeNode *nodes, int *pos, int base, int value) {
+ // writes one value to an existing node and creates a sibling for writing
+ nodes[base].type = kParseTreeWordNode;
+ nodes[base].value = value;
+ nodes[base].right = &nodes[++(*pos)];
+ nodes[*pos].type = kParseTreeBranchNode;
+ nodes[*pos].left = 0;
+ nodes[*pos].right = 0;
+ return *pos;
+}
+
+static int _vbpt_terminate_word(ParseTreeNode *nodes, int *pos, int base, int value) {
+ // Terminates, overwriting a nextwrite forknode
+ nodes[base].type = kParseTreeWordNode;
+ nodes[base].value = value;
+ nodes[base].right = 0;
+ return *pos;
+}
+
static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
uint token;
@@ -470,11 +513,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *
nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
if (nexttoken != TOKEN_CPAREN)
writepos = _vbpt_parenc(nodes, pos, writepos);
- } else if (token & TOKEN_STUFFING_WORD) {
+ } else if (token & TOKEN_STUFFING_LEAF) {
if (nexttoken == TOKEN_CPAREN)
writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
else
writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
+ } else if (token & TOKEN_STUFFING_WORD) {
+ if (nexttoken == TOKEN_CPAREN)
+ writepos = _vbpt_terminate_word(nodes, pos, writepos, token & 0xffff);
+ else
+ writepos = _vbpt_append_word(nodes, pos, writepos, token & 0xffff);
} else {
printf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
vocab_print_rule(rule);
@@ -486,16 +534,16 @@ static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *
return rulepos;
}
-int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) {
+int Vocabulary::parseGNF(const ResultWordListList &words, bool verbose) {
Console *con = g_sci->getSciDebugger();
// Get the start rules:
ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]);
ParseRuleList *results = NULL;
uint word = 0;
const uint words_nr = words.size();
- ResultWordList::const_iterator word_iter = words.begin();
+ ResultWordListList::const_iterator words_iter;
- for (word_iter = words.begin(); word_iter != words.end(); ++word_iter, ++word) {
+ for (words_iter = words.begin(); words_iter != words.end(); ++words_iter, ++word) {
ParseRuleList *new_work = NULL;
ParseRuleList *reduced_rules = NULL;
ParseRuleList *seeker, *subseeker;
@@ -505,8 +553,9 @@ int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) {
seeker = work;
while (seeker) {
- if (seeker->rule->_numSpecials <= (words_nr - word))
- reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *word_iter));
+ if (seeker->rule->_numSpecials <= (words_nr - word)) {
+ reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *words_iter));
+ }
seeker = seeker->next;
}
diff --git a/engines/sci/parser/vocabulary.cpp b/engines/sci/parser/vocabulary.cpp
index 139787eefc..7159e941d5 100644
--- a/engines/sci/parser/vocabulary.cpp
+++ b/engines/sci/parser/vocabulary.cpp
@@ -166,8 +166,14 @@ bool Vocabulary::loadParserWords() {
newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
- // Add the word to the list
- _parserWords[currentWord] = newWord;
+ // SCI01 was the first version to support multiple class/group pairs
+ // per word, so we clear the list in earlier versions
+ // in earlier versions.
+ if (getSciVersion() < SCI_VERSION_01)
+ _parserWords[currentWord].clear();
+
+ // Add this to the list of possible class,group pairs for this word
+ _parserWords[currentWord].push_back(newWord);
seeker += 3;
}
@@ -182,8 +188,9 @@ const char *Vocabulary::getAnyWordFromGroup(int group) {
return "{nothing}";
for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) {
- if (i->_value._group == group)
- return i->_key.c_str();
+ for (ResultWordList::const_iterator j = i->_value.begin(); j != i->_value.end(); ++j)
+ if (j->_group == group)
+ return i->_key.c_str();
}
return "{invalid}";
@@ -266,7 +273,9 @@ bool Vocabulary::loadBranches() {
}
// we assume that *word points to an already lowercased word
-ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
+void Vocabulary::lookupWord(ResultWordList& retval, const char *word, int word_len) {
+ retval.clear();
+
Common::String tempword(word, word_len);
// Remove all dashes from tempword
@@ -278,15 +287,22 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
}
// Look it up:
- WordMap::iterator dict_word = _parserWords.find(tempword);
+ WordMap::iterator dict_words = _parserWords.find(tempword);
// Match found? Return it!
- if (dict_word != _parserWords.end()) {
- return dict_word->_value;
+ if (dict_words != _parserWords.end()) {
+ retval = dict_words->_value;
+
+ // SCI01 was the first version to support
+ // multiple matches, so no need to look further
+ // in earlier versions.
+ if (getSciVersion() < SCI_VERSION_01)
+ return;
+
}
// Now try all suffixes
- for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix)
+ for (SuffixList::const_iterator suffix = _parserSuffixes.begin(); suffix != _parserSuffixes.end(); ++suffix) {
if (suffix->alt_suffix_length <= word_len) {
int suff_index = word_len - suffix->alt_suffix_length;
@@ -299,27 +315,38 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
// ...and append "correct" suffix
tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length);
- dict_word = _parserWords.find(tempword2);
-
- if ((dict_word != _parserWords.end()) && (dict_word->_value._class & suffix->class_mask)) { // Found it?
- // Use suffix class
- ResultWord tmp = dict_word->_value;
- tmp._class = suffix->result_class;
- return tmp;
+ dict_words = _parserWords.find(tempword2);
+
+ if (dict_words != _parserWords.end()) {
+ for (ResultWordList::const_iterator j = dict_words->_value.begin(); j != dict_words->_value.end(); ++j) {
+ if (j->_class & suffix->class_mask) { // Found it?
+ // Use suffix class
+ ResultWord tmp = *j;
+ tmp._class = suffix->result_class;
+ retval.push_back(tmp);
+
+ // SCI01 was the first version to support
+ // multiple matches, so no need to look further
+ // in earlier versions.
+ if (getSciVersion() < SCI_VERSION_01)
+ return;
+ }
+ }
}
}
}
+ }
+
+ if (!retval.empty())
+ return;
// No match so far? Check if it's a number.
- ResultWord retval = { -1, -1 };
char *tester;
if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP };
- retval = tmp;
+ retval.push_back(tmp);
}
-
- return retval;
}
void Vocabulary::debugDecipherSaidBlock(const byte *addr) {
@@ -398,7 +425,7 @@ static const byte lowerCaseMap[256] = {
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0
};
-bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, char **error) {
+bool Vocabulary::tokenizeString(ResultWordListList &retval, const char *sentence, char **error) {
char currentWord[VOCAB_MAX_WORDLENGTH] = "";
int pos_in_sentence = 0;
unsigned char c;
@@ -419,10 +446,12 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch
else {
if (wordLen) { // Finished a word?
- ResultWord lookup_result = lookupWord(currentWord, wordLen);
+ ResultWordList lookup_result;
+
// Look it up
+ lookupWord(lookup_result, currentWord, wordLen);
- if (lookup_result._class == -1) { // Not found?
+ if (lookup_result.empty()) { // Not found?
*error = (char *)calloc(wordLen + 1, 1);
strncpy(*error, currentWord, wordLen); // Set the offending word
retval.clear();
@@ -460,12 +489,14 @@ void Vocabulary::printSuffixes() const {
void Vocabulary::printParserWords() const {
Console *con = g_sci->getSciDebugger();
- int j = 0;
+ int n = 0;
for (WordMap::iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) {
- con->DebugPrintf("%4d: %03x [%03x] %20s |", j, i->_value._class, i->_value._group, i->_key.c_str());
- if (j % 3 == 0)
- con->DebugPrintf("\n");
- j++;
+ for (ResultWordList::iterator j = i->_value.begin(); j != i->_value.end(); ++j) {
+ con->DebugPrintf("%4d: %03x [%03x] %20s |", n, j->_class, j->_group, i->_key.c_str());
+ if (n % 3 == 0)
+ con->DebugPrintf("\n");
+ n++;
+ }
}
con->DebugPrintf("\n");
@@ -527,8 +558,13 @@ void _vocab_recursive_ptree_dump(ParseTreeNode *tree, int blanks) {
if (rbranch) {
if (rbranch->type == kParseTreeBranchNode)
_vocab_recursive_ptree_dump(rbranch, blanks);
- else
+ else {
printf("%x", rbranch->value);
+ while (rbranch->right) {
+ rbranch = rbranch->right;
+ printf("/%x", rbranch->value);
+ }
+ }
}/* else printf("nil");*/
}
@@ -546,14 +582,15 @@ void Vocabulary::dumpParseTree() {
printf("))\n");
}
-void Vocabulary::synonymizeTokens(ResultWordList &words) {
+void Vocabulary::synonymizeTokens(ResultWordListList &words) {
if (_synonyms.empty())
return; // No synonyms: Nothing to check
- for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i)
- for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync)
- if (i->_group == sync->replaceant)
- i->_group = sync->replacement;
+ for (ResultWordListList::iterator i = words.begin(); i != words.end(); ++i)
+ for (ResultWordList::iterator j = i->begin(); j != i->end(); ++j)
+ for (SynonymList::const_iterator sync = _synonyms.begin(); sync != _synonyms.end(); ++sync)
+ if (j->_group == sync->replaceant)
+ j->_group = sync->replacement;
}
void Vocabulary::printParserNodes(int num) {
diff --git a/engines/sci/parser/vocabulary.h b/engines/sci/parser/vocabulary.h
index d4df8af715..1b37219bac 100644
--- a/engines/sci/parser/vocabulary.h
+++ b/engines/sci/parser/vocabulary.h
@@ -117,8 +117,9 @@ struct ResultWord {
};
typedef Common::List<ResultWord> ResultWordList;
+typedef Common::List<ResultWordList> ResultWordListList;
-typedef Common::HashMap<Common::String, ResultWord, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
+typedef Common::HashMap<Common::String, ResultWordList, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
struct ParseRuleList;
@@ -161,7 +162,7 @@ struct ParseTreeNode {
ParseTypes type; /**< leaf or branch */
int value; /**< For leaves */
ParseTreeNode* left; /**< Left child, for branches */
- ParseTreeNode* right; /**< Right child, for branches */
+ ParseTreeNode* right; /**< Right child, for branches (and word leaves) */
};
enum VocabularyVersions {
@@ -186,11 +187,11 @@ public:
/**
* Looks up a single word in the words and suffixes list.
+ * @param retval the list of matches
* @param word pointer to the word to look up
* @param word_len length of the word to look up
- * @return the matching word (or (-1,-1) if there was no match)
*/
- ResultWord lookupWord(const char *word, int word_len);
+ void lookupWord(ResultWordList &retval, const char *word, int word_len);
/**
@@ -204,7 +205,7 @@ public:
* contain any useful words; if not, *error points to a malloc'd copy of
* the offending word. The returned list may contain anywords.
*/
- bool tokenizeString(ResultWordList &retval, const char *sentence, char **error);
+ bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
/**
* Builds a parse tree from a list of words, using a set of Greibach Normal
@@ -215,7 +216,7 @@ public:
* nodes or if the sentence structure in 'words' is not part of the
* language described by the grammar passed in 'rules'.
*/
- int parseGNF(const ResultWordList &words, bool verbose = false);
+ int parseGNF(const ResultWordListList &words, bool verbose = false);
/**
* Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
@@ -262,9 +263,9 @@ public:
/**
* Synonymizes a token list
- * Parameters: (ResultWordList &) words: The word list to synonymize
+ * Parameters: (ResultWordListList &) words: The word list to synonymize
*/
- void synonymizeTokens(ResultWordList &words);
+ void synonymizeTokens(ResultWordListList &words);
void printParserNodes(int num);