Import the SCI engine sources from the FreeSCI Glutton branch (it doesn't compile yet)

svn-id: r38192
author: Jordi Vilalta Prat 2009-02-15 06:10:59 +0000
committer: Jordi Vilalta Prat 2009-02-15 06:10:59 +0000
commit: fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc (patch)
tree: ce87338830cc8c149e1de545246bcefe4f45da00 /engines/sci/include/vocabulary.h
parent: 7c148ddf021c990fa866b7600f979aac9a5b26c9 (diff)
download: scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.gz
scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.bz2
scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.zip
1 files changed, 424 insertions, 0 deletions
diff --git a/engines/sci/include/vocabulary.h b/engines/sci/include/vocabulary.h
new file mode 100644
index 0000000000..fb58dd644b
--- /dev/null
+++ b/engines/sci/include/vocabulary.h
@@ -0,0 +1,424 @@
+/***************************************************************************
+ vocabulary.h Copyright (C) 1999,2000,01 Christoph Reichenbach
+
+
+ This program may be modified and copied freely according to the terms of
+ the GNU general public license (GPL), as long as the above copyright
+ notice and the licensing information contained herein are preserved.
+
+ Please refer to www.gnu.org for licensing details.
+
+ This work is provided AS IS, without warranty of any kind, expressed or
+ implied, including but not limited to the warranties of merchantibility,
+ noninfringement, and fitness for a specific purpose. The author will not
+ be held liable for any damage caused by this work or derivatives of it.
+
+ By using this source code, you agree to the licensing terms as stated
+ above.
+
+
+ Please contact the maintainer for bug reports or inquiries.
+
+ Current Maintainer:
+
+    Christoph Reichenbach (CJR) [jameson@linuxgames.com]
+
+***************************************************************************/
+
+#ifndef VOCABULARY_H
+#define VOCABULARY_H
+
+#include <versions.h>
+#include <sciresource.h>
+
+/*#define VOCABULARY_DEBUG */
+/*#define SCI_SIMPLE_SAID_CODE */ /* Whether the simplified Said() matching should be used */
+/*#define SCI_SIMPLE_SAID_DEBUG */ /* uncomment to enable simple said debugging */
+
+
+#define SCRIPT_UNKNOWN_FUNCTION_STRING "[Unknown]"
+/* The string used to identify the "unknown" SCI0 function for each game */
+
+#define PARSE_HEAP_SIZE 64
+/* Number of bytes allocated on the heap to store bad words if parsing fails */
+
+
+typedef struct opcode_
+{
+  int type;
+  int number;
+  char* name;
+} opcode;
+
+#define VOCAB_RESOURCE_OPCODES 998
+#define VOCAB_RESOURCE_KNAMES 999
+
+#define VOCAB_RESOURCE_SCI0_MAIN_VOCAB 0
+#define VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES 900
+#define VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB 901
+
+#define VOCAB_RESOURCE_SCI1_MAIN_VOCAB 900
+#define VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES 901
+#define VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB 902
+#define VOCAB_RESOURCE_SCI1_CHAR_TRANSFORMS 913
+
+#define VOCAB_CLASS_PREPOSITION 0x01
+#define VOCAB_CLASS_ARTICLE 0x02
+#define VOCAB_CLASS_ADJECTIVE 0x04
+#define VOCAB_CLASS_PRONOUN 0x08
+#define VOCAB_CLASS_NOUN 0x10
+#define VOCAB_CLASS_INDICATIVE_VERB 0x20
+#define VOCAB_CLASS_ADVERB 0x40
+#define VOCAB_CLASS_IMPERATIVE_VERB 0x80
+#define VOCAB_CLASS_NUMBER 0x001
+
+extern DLLEXTERN const char *class_names[]; /* Vocabulary class names */
+
+#define VOCAB_CLASS_ANYWORD 0xff
+/* Anywords are ignored by the parser */
+
+#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
+/* This word class is used for numbers */
+
+#define VOCAB_TREE_NODES 500
+/* Number of nodes for each parse_tree_node structure */
+
+#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
+#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
+#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
+#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
+
+#define SAID_COMMA   0xf0
+#define SAID_AMP     0xf1
+#define SAID_SLASH   0xf2
+#define SAID_PARENO  0xf3
+#define SAID_PARENC  0xf4
+#define SAID_BRACKO  0xf5
+#define SAID_BRACKC  0xf6
+#define SAID_HASH    0xf7
+#define SAID_LT      0xf8
+#define SAID_GT      0xf9
+#define SAID_TERM    0xff
+
+#define SAID_FIRST SAID_COMMA
+
+/* There was no 'last matching word': */
+#define SAID_FULL_MATCH 0xffff
+#define SAID_NO_MATCH 0xfffe
+#define SAID_PARTIAL_MATCH 0xfffd
+
+#define SAID_LONG(x) ((x) << 8)
+
+typedef struct {
+
+  int w_class; /* Word class */
+  int group; /* Word group */
+  char word[1]; /* The actual word */
+
+} word_t;
+
+
+typedef struct {
+  int id; /* non-terminal ID */
+  int first_special; /* first terminal or non-terminal */
+  int specials_nr; /* number of terminals and non-terminals */
+  int length;
+  int data[1]; /* actual data (size 1 to avoid compiler warnings) */
+} parse_rule_t;
+
+
+typedef struct _parse_rule_list {
+  int terminal; /* Terminal character this rule matches against or 0 for a non-terminal rule */
+  parse_rule_t *rule;
+  struct _parse_rule_list *next;
+} parse_rule_list_t;
+
+
+typedef struct {
+
+  int class_mask; /* the word class this suffix applies to */
+  int result_class; /* the word class a word is morphed to if it doesn't fail this check */
+
+  int alt_suffix_length; /* String length of the suffix */
+  int word_suffix_length; /* String length of the other suffix */
+
+  char *alt_suffix; /* The alternative suffix */
+  char *word_suffix; /* The suffix as used in the word vocabulary */
+
+} suffix_t;
+
+
+typedef struct {
+
+  int w_class; /* Word class */
+  int group; /* Word group */
+
+} result_word_t;
+
+
+typedef struct
+{
+  int replaceant; /* The word group to replace */
+  int replacement; /* The replacement word group for this one */
+} synonym_t;
+
+
+typedef struct {
+
+  int id;
+
+  int data[10];
+
+} parse_tree_branch_t;
+
+#define PARSE_TREE_NODE_LEAF 0
+#define PARSE_TREE_NODE_BRANCH 1
+
+
+typedef struct {
+
+  short type;  /* leaf or branch */
+
+  union {
+
+    int value;  /* For leaves */
+    short branches[2]; /* For branches */
+
+  } content;
+
+} parse_tree_node_t;
+
+
+
+
+/*FIXME: These need freeing functions...*/
+
+int* vocabulary_get_classes(resource_mgr_t *resmgr, int *count);
+
+int vocabulary_get_class_count(resource_mgr_t *resmgr);
+
+/**
+ * Returns a null terminated array of selector names.
+ */
+char** vocabulary_get_snames(resource_mgr_t *resmgr, int *pcount, sci_version_t version);
+
+/**
+ * Frees the aforementioned array
+ */
+void vocabulary_free_snames(char **snames_list);
+
+/* Look up a selector name in an array, return the index */
+int vocabulary_lookup_sname(char **snames_list, char *sname);
+
+
+/**
+ * Returns a null terminated array of opcodes.
+ */
+opcode* vocabulary_get_opcodes(resource_mgr_t *resmgr);
+
+void
+vocabulary_free_opcodes(opcode *opcodes);
+/* Frees a previously allocated list of opcodes
+** Parameters: (opcode *) opcodes: Opcodes to free
+** Returns   : (void)
+*/
+
+/**
+ * Returns a null terminated array of kernel function names.
+ *
+ * This function reads the kernel function name table from resource_map,
+ * and returns a null terminated array of deep copies of them.
+ * The returned array has the same format regardless of the format of the
+ * name table of the resource (the format changed between version 0 and 1).
+ */
+char** vocabulary_get_knames(resource_mgr_t *resmgr, int* count);
+void vocabulary_free_knames(char** names);
+
+
+
+word_t **
+vocab_get_words(resource_mgr_t *resmgr, int *word_counter);
+/* Gets all words from the main vocabulary
+** Parameters: (resource_mgr_t *) resmr: The resource manager to read from
+**             (int *) word_counter: The int which the number of words is stored in
+** Returns   : (word_t **): A list of all words, dynamically allocated
+*/
+
+
+void
+vocab_free_words(word_t **words, int words_nr);
+/* Frees memory allocated by vocab_get_words
+** Parameters: (word_t **) words: The words to free
+**             (int) words_nr: Number of words in the structure
+** Returns   : (void)
+*/
+
+
+suffix_t **
+vocab_get_suffices(resource_mgr_t *resmgr, int *suffices_nr);
+/* Gets all suffixes from the suffix vocabulary
+** Parameters: (resource_mgr_t*) resmgr: Resource manager the resources are
+**                               read from
+**             (int *) suffices_nr: The variable to store the number of suffices in
+** Returns   : (suffix_t **): A list of suffixes
+*/
+
+void
+vocab_free_suffices(resource_mgr_t *resmgr, suffix_t **suffices, int suffices_nr);
+/* Frees suffices_nr suffices
+** Parameters: (resource_mgr_t *) resmgr: The resource manager to free from
+**             (suffix_t **) suffices: The suffixes to free
+**             (int) suffices_nr: Number of entrie sin suffices
+** Returns   : (void)
+*/
+
+parse_tree_branch_t *
+vocab_get_branches(resource_mgr_t *resmgr, int *branches_nr);
+/* Retrieves all grammar rules from the resource data
+** Parameters: (resource_mgr_t*) resmgr: Resource manager the rules are
+**                               read from
+**             (int *) branches_nr: Pointer to the variable which the number of entries is to be
+**                     stored in
+** Returns   : (parse_tree_branch_t *): The rules, or NULL on error
+*/
+
+void
+vocab_free_branches(parse_tree_branch_t *parser_branches);
+/* Frees all branches
+** Parameters: (parse_tree_branch_t *) parser_branches: The branches to free
+** Returns   : (null)
+*/
+
+result_word_t *
+vocab_lookup_word(char *word, int word_len,
+		  word_t **words, int words_nr,
+		  suffix_t **suffices, int suffices_nr);
+/* Looks up a single word in the words and suffixes list
+** Parameters: (char *) word: Pointer to the word to look up
+**             (int) word_len: Length of the word to look up
+**             (word_t **) words: List of words
+**             (int) words_nr: Number of elements in 'words'
+**             (suffix_t **) suffices: List of suffices
+**             (int) suffices_nr: Number of entries in 'suffices'
+** Returns   : (result_word_t *) A malloc'd result_word_t, or NULL if the word
+** could not be found.
+*/
+
+
+result_word_t *
+vocab_tokenize_string(char *sentence, int *result_nr,
+		      word_t **words, int words_nr,
+		      suffix_t **suffices, int suffices_nr,
+		      char **error);
+/* Tokenizes a string and compiles it into word_ts.
+** Parameters: (char *) sentence: The sentence to examine
+**             (int *) result_nr: The variable to store the resulting number of words in
+**             (word_t **) words: The words to scan for
+**             (int) words_nr: Number of words to scan for
+**             (suffix_t **) suffices: suffixes to scan for
+**             (int) suffices_nr: Number of suffices to scan for
+**             (char **) error: Points to a malloc'd copy of the offending text or to NULL on error
+** Returns   : (word_t *): A list of word_ts containing the result, or NULL.
+** On error, NULL is returned. If *error is NULL, the sentence did not contain any useful words;
+** if not, *error points to a malloc'd copy of the offending word.
+** The returned list may contain anywords.
+*/
+
+
+parse_rule_list_t *
+vocab_build_gnf(parse_tree_branch_t *branches, int branches_nr);
+/* Constructs the Greibach Normal Form of the grammar supplied in 'branches'
+** Parameters: (parse_tree_branch_t *) branches: The parser's branches
+**             (int) branches_nr: Number of parser branches
+** Returns   : (parse_rule_list_t *): Pointer to a list of singly linked
+**                                    GNF rules describing the same language
+**                                    that was described by 'branches'
+** The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
+** branch[0] is used only for a few magical incantations, as it is treated
+** specially by the SCI parser.
+*/
+
+
+void
+vocab_free_rule_list(parse_rule_list_t *rule_list);
+/* Frees a parser rule list as returned by vocab_build_gnf()
+** Parameters: (parse_rule_list_t *) rule_list: The rule list to free
+** Returns   : (void)
+*/
+
+
+int
+vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+                       parse_tree_branch_t *branch0, parse_rule_list_t *rules);
+/* Builds a parse tree from a list of words
+** Parameters: (parse_tree_node_t *) nodes: A node list to store the tree in (must have
+**                                          at least VOCAB_TREE_NODES entries)
+**             (result_word_t *) words: The words to build the tree from
+**             (int) words_nr: The number of words
+**             (parse_tree_branch_t *) branche0: The zeroeth original branch of the
+**                                     original CNF parser grammar
+**             (parse_rule_list *) rules: The GNF ruleset to parse with
+** Returns   : 0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES nodes
+**             or if the sentence structure in 'words' is not part of the language
+**             described by the grammar passed in 'rules'.
+*/
+
+void
+vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
+/* Prints a parse tree
+** Parameters: (const char *) tree_name: Name of the tree to dump (free-form)
+**             (parse_tree_node_t *) nodes: The nodes containing the parse tree
+** Returns   : (void)
+*/
+
+
+
+
+struct _state;
+
+int
+said(struct _state *s, byte *spec, int verbose);
+/* Builds a parse tree from a spec and compares it to a parse tree
+** Parameters: (state_t *) s: The affected state
+**             (byte *) spec: Pointer to the spec to build
+**             (int) verbose: Whether to display the parse tree after building it
+** Returns   : (int) 1 on a match, 0 otherwise
+*/
+
+const char *
+vocab_get_any_group_word(int group, word_t **words, int words_nr);
+/* Gets any word from the specified group.
+** Parameters: (int) group: Group number.
+**             (word_t **) words: List of words
+**             (int) words_nr: Count of words in the list.
+** For debugging only.
+*/
+
+
+void
+vocab_decypher_said_block(struct _state *s, byte *pos);
+/* Decyphers a said block and dumps its content via sciprintf.
+** Parameters: (state_t *) s: The state to use
+**             (byte *) pos: Pointer to the data to dump
+** For debugging only.
+*/
+
+
+void
+vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr);
+/* Synonymizes a token list
+** Parameters: (result_wort_t *) words: The word list to synonymize
+**             (int) words_nr: Number of word_ts in the list
+**             (synonym_t *) synonyms: Synonym list
+**             (int) synonyms_nr: Number of synonyms in the list
+*/
+
+int
+vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+		parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose);
+
+void
+vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr);
+
+
+#endif
author	Jordi Vilalta Prat	2009-02-15 06:10:59 +0000
committer	Jordi Vilalta Prat	2009-02-15 06:10:59 +0000
commit	fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc (patch)
tree	ce87338830cc8c149e1de545246bcefe4f45da00 /engines/sci/include/vocabulary.h
parent	7c148ddf021c990fa866b7600f979aac9a5b26c9 (diff)
download	scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.gz scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.bz2 scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.zip