SCI: Rewrote vocabulary code to use a Common::HashMap<String,ResultWord>

svn-id: r39671
author: Max Horn 2009-03-24 17:42:12 +0000
committer: Max Horn 2009-03-24 17:42:12 +0000
commit: 1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6 (patch)
tree: 10023b0d5a55d9e10f6bc9634746e0cbac0567d8 /engines/sci/scicore
parent: 65b96f6a1bf1f4ca8a062f785996306dabbfedf3 (diff)
download: scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.tar.gz
scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.tar.bz2
scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.zip
2 files changed, 81 insertions, 131 deletions
diff --git a/engines/sci/scicore/vocabulary.cpp b/engines/sci/scicore/vocabulary.cpp
index f9e58c1123..2190062510 100644
--- a/engines/sci/scicore/vocabulary.cpp
+++ b/engines/sci/scicore/vocabulary.cpp
@@ -63,11 +63,8 @@ const char *class_names[] = {"",
                              ""
                             };
 */
-int _vocab_cmp_words(const void *word1, const void *word2) {
-	return scumm_stricmp((*((word_t **) word1))->word, (*((word_t **)word2))->word);
-}
 
-word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
+bool vocab_get_words(ResourceManager *resmgr, WordMap &words) {
 
 	char currentword[256] = ""; // They're not going to use words longer than 255 ;-)
 	int currentwordpos = 0;
@@ -86,7 +83,7 @@ word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
 
 	if (!resource) {
 		warning("SCI1: Could not find a main vocabulary");
-		return NULL; // NOT critical: SCI1 games and some demos don't have one!
+		return false; // NOT critical: SCI1 games and some demos don't have one!
 	}
 
 	unsigned int seeker;
@@ -97,19 +94,15 @@ word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
 
 	if (resource->size < seeker) {
 		fprintf(stderr, "Invalid main vocabulary encountered: Too small\n");
-		return NULL;
+		return false;
 		// Now this ought to be critical, but it'll just cause parse() and said() not to work
 	}
 
-	int counter = 0;
-	word_t **words;
-	words = (word_t **)sci_malloc(sizeof(word_t *));
+	words.clear();
 
 	while (seeker < resource->size) {
 		byte c;
 
-		words = (word_t**)sci_realloc(words, (counter + 1) * sizeof(word_t *));
-
 		currentwordpos = resource->data[seeker++]; // Parts of previous words may be re-used
 
 		if (vocab_version == 1) {
@@ -120,8 +113,8 @@ word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
 			}
 			if (seeker == resource->size) {
 				warning("SCI1: Vocabulary not usable, disabling");
-				vocab_free_words(words, counter);
-				return NULL;
+				words.clear();
+				return false;
 			}
 		} else {
 			do {
@@ -132,45 +125,33 @@ word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter) {
 
 		currentword[currentwordpos] = 0;
 
-		words[counter] = (word_t*)sci_malloc(sizeof(word_t) + currentwordpos);
-		// Allocate more memory, so that the word fits into the structure
-
-		strcpy(&(words[counter]->word[0]), &(currentword[0])); // Copy the word
-
 		// Now decode class and group:
 		c = resource->data[seeker + 1];
-		words[counter]->w_class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
-		words[counter]->group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
+		ResultWord newWord;
+		newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
+		newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
+
+		// Add the word to the list
+		words[currentword] = newWord;
 
 		seeker += 3;
-		++counter;
 	}
 
-	*word_counter = counter;
-
-	qsort(words, counter, sizeof(word_t *), _vocab_cmp_words); // Sort entries
 
-	return words;
-}
-
-void vocab_free_words(word_t **words, int words_nr) {
-	int i;
-
-	for (i = 0; i < words_nr; i++)
-		free(words[i]);
+// FIXME: Sort the list.
+// Or even switch it to a hashmap?
+//	qsort(words, counter, sizeof(word_t *), _vocab_cmp_words); // Sort entries
 
-	free(words);
+	return true;
 }
 
-const char *vocab_get_any_group_word(int group, word_t **words, int words_nr) {
-	int i;
-
+const char *vocab_get_any_group_word(int group, const WordMap &words) {
 	if (group == VOCAB_MAGIC_NUMBER_GROUP)
 		return "{number}";
 
-	for (i = 0; i < words_nr; i++)
-		if (words[i]->group == group)
-			return words[i]->word;
+	for (WordMap::const_iterator i = words.begin(); i != words.end(); ++i)
+		if (i->_value._group == group)
+			return i->_key.c_str();
 
 	return "{invalid}";
 }
@@ -261,29 +242,23 @@ parse_tree_branch_t *vocab_get_branches(ResourceManager * resmgr, int *branches_
 }
 
 
-ResultWord vocab_lookup_word(char *word, int word_len, word_t **words, int words_nr,
-	const SuffixList &suffixes) {
-	word_t *tempword = (word_t*)sci_malloc(sizeof(word_t) + word_len + 256);
-	// 256: For suffixes. Should suffice.
-	word_t **dict_word;
-	char *tester;
-	int word_len_tmp;
-
-	strncpy(&(tempword->word[0]), word, word_len);
-	tempword->word[word_len] = 0;
-
-	word_len_tmp = word_len;
-	while ((tester = strchr(tempword->word, '-')))
-		memmove(tester, tester + 1, (tempword->word + word_len_tmp--) - tester);
-
-	dict_word = (word_t **)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words);
+ResultWord vocab_lookup_word(char *word, int word_len, const WordMap &words, const SuffixList &suffixes) {
+	Common::String tempword(word, word_len);
 
-	if (dict_word) {
-		free(tempword);
+	// Remove all dashes from tempword
+	for (uint i = 0; i < tempword.size(); ) {
+		if (tempword[i] == '-')
+			tempword.deleteChar(i);
+		else
+			++i;
+	}
 
-		ResultWord tmp = { (*dict_word)->w_class, (*dict_word)->group };
+	// Look it up:
+	WordMap::iterator dict_word = words.find(tempword);
 
-		return tmp;
+	// Match found? Return it!
+	if (dict_word != words.end()) {
+		return dict_word->_value;
 	}
 
 	// Now try all suffixes
@@ -294,18 +269,18 @@ ResultWord vocab_lookup_word(char *word, int word_len, word_t **words, int words
 			// Offset of the start of the suffix
 
 			if (scumm_strnicmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched!
-				strncpy(&(tempword->word[0]), word, word_len);
-				tempword->word[suff_index] = 0; // Terminate word at suffix start position...
-				strncat(&(tempword->word[0]), suffix->word_suffix, suffix->word_suffix_length); // ...and append "correct" suffix
+				// Terminate word at suffix start position...:
+				Common::String tempword2(word, MIN(word_len, suff_index));
 
-				dict_word = (word_t**)bsearch(&tempword, words, words_nr, sizeof(word_t *), _vocab_cmp_words);
+				// ...and append "correct" suffix
+				tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length);
 
-				if ((dict_word) && ((*dict_word)->w_class & suffix->class_mask)) { // Found it?
-					free(tempword);
+				dict_word = words.find(tempword2);
 
+				if ((dict_word != words.end()) && (dict_word->_value._class & suffix->class_mask)) { // Found it?
 					// Use suffix class
-					ResultWord tmp = { suffix->result_class, (*dict_word)->group };
-
+					ResultWord tmp = dict_word->_value;
+					tmp._class = suffix->result_class;
 					return tmp;
 				}
 			}
@@ -313,21 +288,13 @@ ResultWord vocab_lookup_word(char *word, int word_len, word_t **words, int words
 
 	// No match so far? Check if it's a number.
 
-	strncpy(&(tempword->word[0]), word, word_len);
-	tempword->word[word_len] = 0;
-
-	word_len_tmp = word_len;
-	while ((tester = strchr(tempword->word, '-')))
-		memmove(tester, tester + 1, (tempword->word + word_len--) - tester);
-
 	ResultWord retval = { -1, -1 };
-	if ((strtol(&(tempword->word[0]), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
+	char *tester;
+	if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
 		ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP };
 		retval = tmp;
 	}
 
-	free(tempword);
-
 	return retval;
 }
 
@@ -339,7 +306,7 @@ void vocab_decypher_said_block(EngineState *s, byte *addr) {
 
 		if (nextitem < 0xf0) {
 			nextitem = nextitem << 8 | *addr++;
-			sciprintf(" %s[%03x]", vocab_get_any_group_word(nextitem, s->parser_words, s->parser_words_nr), nextitem);
+			sciprintf(" %s[%03x]", vocab_get_any_group_word(nextitem, s->_parserWords), nextitem);
 
 			nextitem = 42; // Make sure that group 0xff doesn't abort
 		} else switch (nextitem) {
@@ -396,13 +363,13 @@ static const short _related_words[][2] = { // 0 is backwards, 1 is forward
 	{0x000, 0x180} // number
 };
 
-int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr) {
+int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, WordMap &words) {
 	int i, length, pos = 0;
 
-	for (i = 0; i < words_nr; ++i) {
-		if (words[i].w_class != VOCAB_CLASS_ANYWORD) {
-			nodes[pos].type = words[i].w_class;
-			nodes[pos].content.value = words[i].group;
+	for (i = 0; i < words.size(); ++i) {
+		if (words[i]._class != VOCAB_CLASS_ANYWORD) {
+			nodes[pos].type = words[i]._class;
+			nodes[pos].content.value = words[i]._group;
 			pos += 2; // Link information is filled in below
 		}
 	}
@@ -477,7 +444,7 @@ int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, result_word_t *words
 }
 #endif
 
-int vocab_tokenize_string(ResultWordList &retval, char *sentence, word_t **words, int words_nr,
+bool vocab_tokenize_string(ResultWordList &retval, char *sentence, const WordMap &words,
 	const SuffixList &suffixes, char **error) {
 	char *lastword = sentence;
 	int pos_in_sentence = 0;
@@ -499,14 +466,14 @@ int vocab_tokenize_string(ResultWordList &retval, char *sentence, word_t **words
 			if (wordlen) { // Finished a word?
 
 				ResultWord lookup_result =
-				    vocab_lookup_word(lastword, wordlen, words, words_nr, suffixes);
+				    vocab_lookup_word(lastword, wordlen, words, suffixes);
 				// Look it up
 
-				if (lookup_result.w_class == -1) { // Not found?
+				if (lookup_result._class == -1) { // Not found?
 					*error = (char *)sci_calloc(wordlen + 1, 1);
 					strncpy(*error, lastword, wordlen); // Set the offending word
 					retval.clear();
-					return 1; // And return with error
+					return false; // And return with error
 				}
 
 				// Copy into list
@@ -519,7 +486,7 @@ int vocab_tokenize_string(ResultWordList &retval, char *sentence, word_t **words
 
 	} while (c); // Until terminator is hit
 
-	return 0;
+	return true;
 }
 
 void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) {
@@ -604,8 +571,8 @@ void vocab_synonymize_tokens(ResultWordList &words, const SynonymList &synonyms)
 
 	for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i)
 		for (SynonymList::const_iterator sync = synonyms.begin(); sync != synonyms.end(); ++sync)
-			if (i->group == sync->replaceant)
-				i->group = sync->replacement;
+			if (i->_group == sync->replaceant)
+				i->_group = sync->replacement;
 }
 
 } // End of namespace Sci
diff --git a/engines/sci/scicore/vocabulary.h b/engines/sci/scicore/vocabulary.h
index 7a9507b74c..e3185d71e5 100644
--- a/engines/sci/scicore/vocabulary.h
+++ b/engines/sci/scicore/vocabulary.h
@@ -27,6 +27,8 @@
 #define SCI_SCICORE_VOCABULARY_H
 
 #include "common/str.h"
+#include "common/hashmap.h"
+#include "common/hash-str.h"
 #include "common/list.h"
 
 #include "sci/scicore/versions.h"
@@ -115,17 +117,13 @@ extern const char *class_names[]; /* Vocabulary class names */
 #define SAID_LONG(x) ((x) << 8)
 
 struct ResultWord {
-	int w_class; /* Word class */
-	int group; /* Word group */
+	int _class; /* Word class */
+	int _group; /* Word group */
 };
 
 typedef Common::List<ResultWord> ResultWordList;
 
-struct word_t {
-	int w_class; /* Word class */
-	int group; /* Word group */
-	char word[1]; /* The actual word */
-};
+typedef Common::HashMap<Common::String, ResultWord, Common::IgnoreCase_Hash, Common::IgnoreCase_EqualTo> WordMap;
 
 
 struct parse_rule_t {
@@ -210,7 +208,6 @@ opcode *vocabulary_get_opcodes(ResourceManager *resmgr);
 void vocabulary_free_opcodes(opcode *opcodes);
 /* Frees a previously allocated list of opcodes
 ** Parameters: (opcode *) opcodes: Opcodes to free
-** Returns   : (void)
 */
 
 /**
@@ -225,21 +222,13 @@ char **vocabulary_get_knames(ResourceManager *resmgr, int* count);
 void vocabulary_free_knames(char** names);
 
 
-
-word_t **vocab_get_words(ResourceManager *resmgr, int *word_counter);
-/* Gets all words from the main vocabulary
-** Parameters: (ResourceManager *) resmr: The resource manager to read from
-**             (int *) word_counter: The int which the number of words is stored in
-** Returns   : (word_t **): A list of all words, dynamically allocated
-*/
-
-
-void vocab_free_words(word_t **words, int words_nr);
-/* Frees memory allocated by vocab_get_words
-** Parameters: (word_t **) words: The words to free
-**             (int) words_nr: Number of words in the structure
-** Returns   : (void)
-*/
+/**
+ * Gets all words from the main vocabulary.
+ * @param resmr		The resource manager to read from
+ * @param words		A list of all words
+ * @return true on success, false on failure
+ */
+bool vocab_get_words(ResourceManager *resmgr, WordMap &words);
 
 
 bool vocab_get_suffixes(ResourceManager *resmgr, SuffixList &suffixes);
@@ -253,7 +242,6 @@ void vocab_free_suffixes(ResourceManager *resmgr, SuffixList &suffixes);
 /* Frees all suffixes in the given list.
 ** Parameters: (ResourceManager *) resmgr: The resource manager to free from
 **             (SuffixList) suffixes: The suffixes to free
-** Returns   : (void)
 */
 
 parse_tree_branch_t *vocab_get_branches(ResourceManager *resmgr, int *branches_nr);
@@ -272,27 +260,25 @@ void vocab_free_branches(parse_tree_branch_t *parser_branches);
 */
 
 ResultWord vocab_lookup_word(char *word, int word_len,
-	word_t **words, int words_nr, const SuffixList &suffixes);
+	const WordMap &words, const SuffixList &suffixes);
 /* Looks up a single word in the words and suffixes list
 ** Parameters: (char *) word: Pointer to the word to look up
 **             (int) word_len: Length of the word to look up
-**             (word_t **) words: List of words
-**             (int) words_nr: Number of elements in 'words'
+**             (const WordMap &) words: List of words
 **             (SuffixList) suffixes: List of suffixes
 ** Returns   : (const ResultWordList &) A list containing 1 or 0 words
 */
 
 
-int vocab_tokenize_string(ResultWordList &retval, char *sentence,
-	word_t **words, int words_nr, const SuffixList &suffixes, char **error);
+bool vocab_tokenize_string(ResultWordList &retval, char *sentence,
+	const WordMap &words, const SuffixList &suffixes, char **error);
 /* Tokenizes a string and compiles it into word_ts.
 ** Parameters: (char *) sentence: The sentence to examine
-**             (word_t **) words: The words to scan for
-**             (int) words_nr: Number of words to scan for
+**             (const WordMap &) words: The words to scan for
 **             (SuffixList) suffixes: suffixes to scan for
 **             (char **) error: Points to a malloc'd copy of the offending text or to NULL on error
 **             (ResultWordList) retval: A list of word_ts containing the result, or NULL.
-** Returns   : 0 on success, 1 if an error occurred
+** Returns   : true on success, false on failure
 ** On error, NULL is returned. If *error is NULL, the sentence did not contain any useful words;
 ** if not, *error points to a malloc'd copy of the offending word.
 ** The returned list may contain anywords.
@@ -315,7 +301,6 @@ parse_rule_list_t *vocab_build_gnf(parse_tree_branch_t *branches, int branches_n
 void vocab_free_rule_list(parse_rule_list_t *rule_list);
 /* Frees a parser rule list as returned by vocab_build_gnf()
 ** Parameters: (parse_rule_list_t *) rule_list: The rule list to free
-** Returns   : (void)
 */
 
 
@@ -337,7 +322,6 @@ void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
 /* Prints a parse tree
 ** Parameters: (const char *) tree_name: Name of the tree to dump (free-form)
 **             (parse_tree_node_t *) nodes: The nodes containing the parse tree
-** Returns   : (void)
 */
 
 
@@ -351,13 +335,12 @@ int said(EngineState *s, byte *spec, int verbose);
 ** Returns   : (int) 1 on a match, 0 otherwise
 */
 
-const char *vocab_get_any_group_word(int group, word_t **words, int words_nr);
-/* Gets any word from the specified group.
-** Parameters: (int) group: Group number.
-**             (word_t **) words: List of words
-**             (int) words_nr: Count of words in the list.
-** For debugging only.
-*/
+/**
+ * Gets any word from the specified group. For debugging only.
+ * @param group		Group number
+ * @param words		List of words
+ */
+const char *vocab_get_any_group_word(int group, const WordMap &words);
 
 
 void vocab_decypher_said_block(EngineState *s, byte *pos);
author	Max Horn	2009-03-24 17:42:12 +0000
committer	Max Horn	2009-03-24 17:42:12 +0000
commit	1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6 (patch)
tree	10023b0d5a55d9e10f6bc9634746e0cbac0567d8 /engines/sci/scicore
parent	65b96f6a1bf1f4ca8a062f785996306dabbfedf3 (diff)
download	scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.tar.gz scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.tar.bz2 scummvm-rg350-1e94b9cd3c7cd889d6b3cd0d8a9aa1b28f2cdda6.zip