Separated the parser code

svn-id: r47480
author: Filippos Karapetis 2010-01-23 17:55:54 +0000
committer: Filippos Karapetis 2010-01-23 17:55:54 +0000
commit: df149e1509d972b2d5bfe903531d9670c2fe83c7 (patch)
tree: d0012ab2d2fde14a0fbcaf74ff43e3a4e19ec929 /engines/sci/parser/vocabulary.h
parent: 4fcc82e7a625a0b27927491ca03c41a9f3dca35b (diff)
download: scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.tar.gz
scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.tar.bz2
scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.zip
1 files changed, 338 insertions, 0 deletions
diff --git a/engines/sci/parser/vocabulary.h b/engines/sci/parser/vocabulary.h
new file mode 100644
index 0000000000..00b8780d1c
--- /dev/null
+++ b/engines/sci/parser/vocabulary.h
@@ -0,0 +1,338 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#ifndef SCI_SCICORE_VOCABULARY_H
+#define SCI_SCICORE_VOCABULARY_H
+
+#include "common/str.h"
+#include "common/hashmap.h"
+#include "common/hash-str.h"
+#include "common/list.h"
+
+#include "sci/sci.h"
+
+namespace Sci {
+
+class ResourceManager;
+
+/*#define VOCABULARY_DEBUG */
+
+/** Number of bytes allocated on the heap to store bad words if parsing fails */
+#define PARSE_HEAP_SIZE 64
+
+enum {
+	VOCAB_RESOURCE_SELECTORS = 997,
+
+	VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
+	VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
+	VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
+
+	VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
+	VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
+	VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902
+};
+
+
+enum {
+	VOCAB_CLASS_PREPOSITION = 0x01,
+	VOCAB_CLASS_ARTICLE = 0x02,
+	VOCAB_CLASS_ADJECTIVE = 0x04,
+	VOCAB_CLASS_PRONOUN = 0x08,
+	VOCAB_CLASS_NOUN = 0x10,
+	VOCAB_CLASS_INDICATIVE_VERB = 0x20,
+	VOCAB_CLASS_ADVERB = 0x40,
+	VOCAB_CLASS_IMPERATIVE_VERB = 0x80,
+	VOCAB_CLASS_NUMBER = 0x001
+};
+
+enum {
+	kParseEndOfInput = 0,
+	kParseOpeningParenthesis = 1,
+	kParseClosingParenthesis = 2,
+	kParseNil = 3,
+	kParseNumber = 4
+};
+
+/* Anywords are ignored by the parser */
+#define VOCAB_CLASS_ANYWORD 0xff
+
+/* This word class is used for numbers */
+#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
+
+/* Number of nodes for each parse_tree_node structure */
+#define VOCAB_TREE_NODES 500
+
+#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
+#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
+#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
+#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
+
+#define SAID_COMMA   0xf0
+#define SAID_AMP     0xf1
+#define SAID_SLASH   0xf2
+#define SAID_PARENO  0xf3
+#define SAID_PARENC  0xf4
+#define SAID_BRACKO  0xf5
+#define SAID_BRACKC  0xf6
+#define SAID_HASH    0xf7
+#define SAID_LT      0xf8
+#define SAID_GT      0xf9
+#define SAID_TERM    0xff
+
+#define SAID_FIRST SAID_COMMA
+
+/* There was no 'last matching word': */
+#define SAID_FULL_MATCH 0xffff
+#define SAID_NO_MATCH 0xfffe
+#define SAID_PARTIAL_MATCH 0xfffd
+
+#define SAID_LONG(x) ((x) << 8)
+
+struct ResultWord {
+	int _class; /**< Word class */
+	int _group; /**< Word group */
+};
+
+typedef Common::List<ResultWord> ResultWordList;
+
+typedef Common::HashMap<Common::String, ResultWord, Common::IgnoreCase_Hash, Common::IgnoreCase_EqualTo> WordMap;
+
+
+struct ParseRuleList;
+
+struct suffix_t {
+
+	int class_mask; /**< the word class this suffix applies to */
+	int result_class; /**< the word class a word is morphed to if it doesn't fail this check */
+
+	int alt_suffix_length; /**< String length of the suffix */
+	int word_suffix_length; /**< String length of the other suffix */
+
+	const char *alt_suffix; /**< The alternative suffix */
+	const char *word_suffix; /**< The suffix as used in the word vocabulary */
+
+};
+
+typedef Common::List<suffix_t> SuffixList;
+
+
+struct synonym_t {
+	int replaceant; /**< The word group to replace */
+	int replacement; /**< The replacement word group for this one */
+};
+
+typedef Common::List<synonym_t> SynonymList;
+
+struct parse_tree_branch_t {
+	int id;
+	int data[10];
+};
+
+enum ParseTypes {
+	kParseTreeLeafNode = 0,
+	kParseTreeBranchNode = 1
+};
+
+struct parse_tree_node_t {
+	ParseTypes type;  /**< leaf or branch */
+	union {
+		int value;  /**< For leaves */
+		short branches[2]; /**< For branches */
+	} content;
+};
+
+enum VocabularyVersions {
+	kVocabularySCI0 = 0,
+	kVocabularySCI1 = 1
+};
+
+class Vocabulary {
+public:
+	Vocabulary(ResourceManager *resMan);
+	~Vocabulary();
+
+	/**
+	 * Gets any word from the specified group. For debugging only.
+	 * @param group		Group number
+	 */
+	const char *getAnyWordFromGroup(int group);
+
+
+	/**
+	 * Looks up a single word in the words and suffixes list.
+	 * @param word		pointer to the word to look up
+	 * @param word_len	length of the word to look up
+	 * @return the matching word (or (-1,-1) if there was no match)
+	 */
+	ResultWord lookupWord(const char *word, int word_len);
+
+
+	/**
+	 * Tokenizes a string and compiles it into word_ts.
+	 * @param[in] retval		A list of words which will be set to the result
+	 * @param[out] sentence		The sentence to examine
+	 * @param[out] error		Points to a malloc'd copy of the offending text or to NULL on error
+	 * @return true on success, false on failure
+	 *
+	 * On error, false is returned. If *error is NULL, the sentence did not
+	 * contain any useful words; if not, *error points to a malloc'd copy of
+	 * the offending word. The returned list may contain anywords.
+	 */
+	bool tokenizeString(ResultWordList &retval, const char *sentence, char **error);
+
+	/**
+	 * Builds a parse tree from a list of words, using a set of Greibach Normal
+	 * Form rules.
+	 * @param words		The words to build the tree from
+	 * @param verbose	Set to true for debugging
+	 * @return	0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES
+	 *			nodes or if the sentence structure in 'words' is not part of the
+	 *			language described by the grammar passed in 'rules'.
+	 */
+	int parseGNF(const ResultWordList &words, bool verbose = false);
+
+	/**
+	 * Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
+	 * @param verbose	Set to true for debugging. If true, the list is
+	 *					freed before the function ends
+	 * @return	Pointer to a list of singly linked GNF rules describing the same
+	 *			language that was described by 'branches'
+	 *
+	 * The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
+	 * branch[0] is used only for a few magical incantations, as it is treated
+	 * specially by the SCI parser.
+	 */
+	ParseRuleList *buildGNF(bool verbose = false);
+
+	/**
+	 * Deciphers a said block and dumps its content via printf.
+	 * For debugging only.
+	 * @param pos	pointer to the data to dump
+	 */
+	void decipherSaidBlock(byte *pos);
+
+	/**
+	 * Prints the parser suffixes to the debug console.
+	 */
+	void printSuffixes() const;
+
+	/**
+	 * Prints the parser words to the debug console.
+	 */
+	void printParserWords() const;
+
+	uint getParserBranchesSize() const { return _parserBranches.size(); }
+	const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
+
+	/**
+	 * Adds a new synonym to the list
+	 */
+	void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
+	
+	/**
+	 * Clears the list of synonyms
+	 */
+	void clearSynonyms() { _synonyms.clear(); }
+	
+	/** 
+	 * Synonymizes a token list
+	 * Parameters: (ResultWordList &) words: The word list to synonymize
+	 */
+	void synonymizeTokens(ResultWordList &words);
+
+	void printParserNodes(int num);
+
+	void dumpParseTree();
+
+	int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
+
+private:
+	/**
+	 * Loads all words from the main vocabulary.
+	 * @return true on success, false on failure
+	 */
+	bool loadParserWords();
+
+	/**
+	 * Loads all suffixes from the suffix vocabulary.
+	 * @return true on success, false on failure
+	 */
+	bool loadSuffixes();
+
+	/**
+	 * Frees all suffixes in the given list.
+	 * @param suffixes: The suffixes to free
+	 */
+	void freeSuffixes();
+
+	/**
+	 * Retrieves all grammar rules from the resource data.
+	 * @param branches		The rules are stored into this Array
+	 * @return true on success, false on error
+	 */
+	bool loadBranches();
+
+	/**
+	 * Frees a parser rule list as returned by vocab_build_gnf().
+	 * @param rule_list		the rule list to free
+	 */
+	void freeRuleList(ParseRuleList *rule_list);
+
+	ResourceManager *_resMan;
+	VocabularyVersions _vocabVersion;
+
+	// Parser-related lists
+	SuffixList _parserSuffixes;
+	ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */
+	Common::Array<parse_tree_branch_t> _parserBranches;
+	WordMap _parserWords;
+	SynonymList _synonyms; /**< The list of synonyms */
+
+public:
+	// Accessed by said()
+	parse_tree_node_t _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
+};
+
+/**
+ * Prints a parse tree.
+ * @param tree_name		Name of the tree to dump (free-form)
+ * @param nodes			The nodes containing the parse tree
+ */
+void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
+
+
+
+/**
+ * Builds a parse tree from a spec and compares it to a parse tree.
+ * @param s			The affected state
+ * @param spec		Pointer to the spec to build
+ * @param verbose	Whether to display the parse tree after building it
+ * @return 1 on a match, 0 otherwise
+ */
+int said(EngineState *s, byte *spec, bool verbose);
+
+} // End of namespace Sci
+
+#endif // SCI_SCICORE_VOCABULARY_H
author	Filippos Karapetis	2010-01-23 17:55:54 +0000
committer	Filippos Karapetis	2010-01-23 17:55:54 +0000
commit	df149e1509d972b2d5bfe903531d9670c2fe83c7 (patch)
tree	d0012ab2d2fde14a0fbcaf74ff43e3a4e19ec929 /engines/sci/parser/vocabulary.h
parent	4fcc82e7a625a0b27927491ca03c41a9f3dca35b (diff)
download	scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.tar.gz scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.tar.bz2 scummvm-rg350-df149e1509d972b2d5bfe903531d9670c2fe83c7.zip