aboutsummaryrefslogtreecommitdiff
path: root/engines/sci/include/vocabulary.h
diff options
context:
space:
mode:
authorJordi Vilalta Prat2009-02-15 06:10:59 +0000
committerJordi Vilalta Prat2009-02-15 06:10:59 +0000
commitfa6e10e9cec163845aa29e7940c86e9c9ab8a2bc (patch)
treece87338830cc8c149e1de545246bcefe4f45da00 /engines/sci/include/vocabulary.h
parent7c148ddf021c990fa866b7600f979aac9a5b26c9 (diff)
downloadscummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.gz
scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.tar.bz2
scummvm-rg350-fa6e10e9cec163845aa29e7940c86e9c9ab8a2bc.zip
Import the SCI engine sources from the FreeSCI Glutton branch (it doesn't compile yet)
svn-id: r38192
Diffstat (limited to 'engines/sci/include/vocabulary.h')
-rw-r--r--engines/sci/include/vocabulary.h424
1 files changed, 424 insertions, 0 deletions
diff --git a/engines/sci/include/vocabulary.h b/engines/sci/include/vocabulary.h
new file mode 100644
index 0000000000..fb58dd644b
--- /dev/null
+++ b/engines/sci/include/vocabulary.h
@@ -0,0 +1,424 @@
+/***************************************************************************
+ vocabulary.h Copyright (C) 1999,2000,01 Christoph Reichenbach
+
+
+ This program may be modified and copied freely according to the terms of
+ the GNU general public license (GPL), as long as the above copyright
+ notice and the licensing information contained herein are preserved.
+
+ Please refer to www.gnu.org for licensing details.
+
+ This work is provided AS IS, without warranty of any kind, expressed or
+ implied, including but not limited to the warranties of merchantibility,
+ noninfringement, and fitness for a specific purpose. The author will not
+ be held liable for any damage caused by this work or derivatives of it.
+
+ By using this source code, you agree to the licensing terms as stated
+ above.
+
+
+ Please contact the maintainer for bug reports or inquiries.
+
+ Current Maintainer:
+
+ Christoph Reichenbach (CJR) [jameson@linuxgames.com]
+
+***************************************************************************/
+
+#ifndef VOCABULARY_H
+#define VOCABULARY_H
+
+#include <versions.h>
+#include <sciresource.h>
+
+/*#define VOCABULARY_DEBUG */
+/*#define SCI_SIMPLE_SAID_CODE */ /* Whether the simplified Said() matching should be used */
+/*#define SCI_SIMPLE_SAID_DEBUG */ /* uncomment to enable simple said debugging */
+
+
+#define SCRIPT_UNKNOWN_FUNCTION_STRING "[Unknown]"
+/* The string used to identify the "unknown" SCI0 function for each game */
+
+#define PARSE_HEAP_SIZE 64
+/* Number of bytes allocated on the heap to store bad words if parsing fails */
+
+
+typedef struct opcode_
+{
+ int type;
+ int number;
+ char* name;
+} opcode;
+
+#define VOCAB_RESOURCE_OPCODES 998
+#define VOCAB_RESOURCE_KNAMES 999
+
+#define VOCAB_RESOURCE_SCI0_MAIN_VOCAB 0
+#define VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES 900
+#define VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB 901
+
+#define VOCAB_RESOURCE_SCI1_MAIN_VOCAB 900
+#define VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES 901
+#define VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB 902
+#define VOCAB_RESOURCE_SCI1_CHAR_TRANSFORMS 913
+
+#define VOCAB_CLASS_PREPOSITION 0x01
+#define VOCAB_CLASS_ARTICLE 0x02
+#define VOCAB_CLASS_ADJECTIVE 0x04
+#define VOCAB_CLASS_PRONOUN 0x08
+#define VOCAB_CLASS_NOUN 0x10
+#define VOCAB_CLASS_INDICATIVE_VERB 0x20
+#define VOCAB_CLASS_ADVERB 0x40
+#define VOCAB_CLASS_IMPERATIVE_VERB 0x80
+#define VOCAB_CLASS_NUMBER 0x001
+
+extern DLLEXTERN const char *class_names[]; /* Vocabulary class names */
+
+#define VOCAB_CLASS_ANYWORD 0xff
+/* Anywords are ignored by the parser */
+
+#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
+/* This word class is used for numbers */
+
+#define VOCAB_TREE_NODES 500
+/* Number of nodes for each parse_tree_node structure */
+
+#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
+#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
+#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
+#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
+
+#define SAID_COMMA 0xf0
+#define SAID_AMP 0xf1
+#define SAID_SLASH 0xf2
+#define SAID_PARENO 0xf3
+#define SAID_PARENC 0xf4
+#define SAID_BRACKO 0xf5
+#define SAID_BRACKC 0xf6
+#define SAID_HASH 0xf7
+#define SAID_LT 0xf8
+#define SAID_GT 0xf9
+#define SAID_TERM 0xff
+
+#define SAID_FIRST SAID_COMMA
+
+/* There was no 'last matching word': */
+#define SAID_FULL_MATCH 0xffff
+#define SAID_NO_MATCH 0xfffe
+#define SAID_PARTIAL_MATCH 0xfffd
+
+#define SAID_LONG(x) ((x) << 8)
+
+typedef struct {
+
+ int w_class; /* Word class */
+ int group; /* Word group */
+ char word[1]; /* The actual word */
+
+} word_t;
+
+
+typedef struct {
+ int id; /* non-terminal ID */
+ int first_special; /* first terminal or non-terminal */
+ int specials_nr; /* number of terminals and non-terminals */
+ int length;
+ int data[1]; /* actual data (size 1 to avoid compiler warnings) */
+} parse_rule_t;
+
+
+typedef struct _parse_rule_list {
+ int terminal; /* Terminal character this rule matches against or 0 for a non-terminal rule */
+ parse_rule_t *rule;
+ struct _parse_rule_list *next;
+} parse_rule_list_t;
+
+
+typedef struct {
+
+ int class_mask; /* the word class this suffix applies to */
+ int result_class; /* the word class a word is morphed to if it doesn't fail this check */
+
+ int alt_suffix_length; /* String length of the suffix */
+ int word_suffix_length; /* String length of the other suffix */
+
+ char *alt_suffix; /* The alternative suffix */
+ char *word_suffix; /* The suffix as used in the word vocabulary */
+
+} suffix_t;
+
+
+typedef struct {
+
+ int w_class; /* Word class */
+ int group; /* Word group */
+
+} result_word_t;
+
+
+typedef struct
+{
+ int replaceant; /* The word group to replace */
+ int replacement; /* The replacement word group for this one */
+} synonym_t;
+
+
+typedef struct {
+
+ int id;
+
+ int data[10];
+
+} parse_tree_branch_t;
+
+#define PARSE_TREE_NODE_LEAF 0
+#define PARSE_TREE_NODE_BRANCH 1
+
+
+typedef struct {
+
+ short type; /* leaf or branch */
+
+ union {
+
+ int value; /* For leaves */
+ short branches[2]; /* For branches */
+
+ } content;
+
+} parse_tree_node_t;
+
+
+
+
+/*FIXME: These need freeing functions...*/
+
+int* vocabulary_get_classes(resource_mgr_t *resmgr, int *count);
+
+int vocabulary_get_class_count(resource_mgr_t *resmgr);
+
+/**
+ * Returns a null terminated array of selector names.
+ */
+char** vocabulary_get_snames(resource_mgr_t *resmgr, int *pcount, sci_version_t version);
+
+/**
+ * Frees the aforementioned array
+ */
+void vocabulary_free_snames(char **snames_list);
+
+/* Look up a selector name in an array, return the index */
+int vocabulary_lookup_sname(char **snames_list, char *sname);
+
+
+/**
+ * Returns a null terminated array of opcodes.
+ */
+opcode* vocabulary_get_opcodes(resource_mgr_t *resmgr);
+
+void
+vocabulary_free_opcodes(opcode *opcodes);
+/* Frees a previously allocated list of opcodes
+** Parameters: (opcode *) opcodes: Opcodes to free
+** Returns : (void)
+*/
+
+/**
+ * Returns a null terminated array of kernel function names.
+ *
+ * This function reads the kernel function name table from resource_map,
+ * and returns a null terminated array of deep copies of them.
+ * The returned array has the same format regardless of the format of the
+ * name table of the resource (the format changed between version 0 and 1).
+ */
+char** vocabulary_get_knames(resource_mgr_t *resmgr, int* count);
+void vocabulary_free_knames(char** names);
+
+
+
+word_t **
+vocab_get_words(resource_mgr_t *resmgr, int *word_counter);
+/* Gets all words from the main vocabulary
+** Parameters: (resource_mgr_t *) resmr: The resource manager to read from
+** (int *) word_counter: The int which the number of words is stored in
+** Returns : (word_t **): A list of all words, dynamically allocated
+*/
+
+
+void
+vocab_free_words(word_t **words, int words_nr);
+/* Frees memory allocated by vocab_get_words
+** Parameters: (word_t **) words: The words to free
+** (int) words_nr: Number of words in the structure
+** Returns : (void)
+*/
+
+
+suffix_t **
+vocab_get_suffices(resource_mgr_t *resmgr, int *suffices_nr);
+/* Gets all suffixes from the suffix vocabulary
+** Parameters: (resource_mgr_t*) resmgr: Resource manager the resources are
+** read from
+** (int *) suffices_nr: The variable to store the number of suffices in
+** Returns : (suffix_t **): A list of suffixes
+*/
+
+void
+vocab_free_suffices(resource_mgr_t *resmgr, suffix_t **suffices, int suffices_nr);
+/* Frees suffices_nr suffices
+** Parameters: (resource_mgr_t *) resmgr: The resource manager to free from
+** (suffix_t **) suffices: The suffixes to free
+** (int) suffices_nr: Number of entrie sin suffices
+** Returns : (void)
+*/
+
+parse_tree_branch_t *
+vocab_get_branches(resource_mgr_t *resmgr, int *branches_nr);
+/* Retrieves all grammar rules from the resource data
+** Parameters: (resource_mgr_t*) resmgr: Resource manager the rules are
+** read from
+** (int *) branches_nr: Pointer to the variable which the number of entries is to be
+** stored in
+** Returns : (parse_tree_branch_t *): The rules, or NULL on error
+*/
+
+void
+vocab_free_branches(parse_tree_branch_t *parser_branches);
+/* Frees all branches
+** Parameters: (parse_tree_branch_t *) parser_branches: The branches to free
+** Returns : (null)
+*/
+
+result_word_t *
+vocab_lookup_word(char *word, int word_len,
+ word_t **words, int words_nr,
+ suffix_t **suffices, int suffices_nr);
+/* Looks up a single word in the words and suffixes list
+** Parameters: (char *) word: Pointer to the word to look up
+** (int) word_len: Length of the word to look up
+** (word_t **) words: List of words
+** (int) words_nr: Number of elements in 'words'
+** (suffix_t **) suffices: List of suffices
+** (int) suffices_nr: Number of entries in 'suffices'
+** Returns : (result_word_t *) A malloc'd result_word_t, or NULL if the word
+** could not be found.
+*/
+
+
+result_word_t *
+vocab_tokenize_string(char *sentence, int *result_nr,
+ word_t **words, int words_nr,
+ suffix_t **suffices, int suffices_nr,
+ char **error);
+/* Tokenizes a string and compiles it into word_ts.
+** Parameters: (char *) sentence: The sentence to examine
+** (int *) result_nr: The variable to store the resulting number of words in
+** (word_t **) words: The words to scan for
+** (int) words_nr: Number of words to scan for
+** (suffix_t **) suffices: suffixes to scan for
+** (int) suffices_nr: Number of suffices to scan for
+** (char **) error: Points to a malloc'd copy of the offending text or to NULL on error
+** Returns : (word_t *): A list of word_ts containing the result, or NULL.
+** On error, NULL is returned. If *error is NULL, the sentence did not contain any useful words;
+** if not, *error points to a malloc'd copy of the offending word.
+** The returned list may contain anywords.
+*/
+
+
+parse_rule_list_t *
+vocab_build_gnf(parse_tree_branch_t *branches, int branches_nr);
+/* Constructs the Greibach Normal Form of the grammar supplied in 'branches'
+** Parameters: (parse_tree_branch_t *) branches: The parser's branches
+** (int) branches_nr: Number of parser branches
+** Returns : (parse_rule_list_t *): Pointer to a list of singly linked
+** GNF rules describing the same language
+** that was described by 'branches'
+** The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
+** branch[0] is used only for a few magical incantations, as it is treated
+** specially by the SCI parser.
+*/
+
+
+void
+vocab_free_rule_list(parse_rule_list_t *rule_list);
+/* Frees a parser rule list as returned by vocab_build_gnf()
+** Parameters: (parse_rule_list_t *) rule_list: The rule list to free
+** Returns : (void)
+*/
+
+
+int
+vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+ parse_tree_branch_t *branch0, parse_rule_list_t *rules);
+/* Builds a parse tree from a list of words
+** Parameters: (parse_tree_node_t *) nodes: A node list to store the tree in (must have
+** at least VOCAB_TREE_NODES entries)
+** (result_word_t *) words: The words to build the tree from
+** (int) words_nr: The number of words
+** (parse_tree_branch_t *) branche0: The zeroeth original branch of the
+** original CNF parser grammar
+** (parse_rule_list *) rules: The GNF ruleset to parse with
+** Returns : 0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES nodes
+** or if the sentence structure in 'words' is not part of the language
+** described by the grammar passed in 'rules'.
+*/
+
+void
+vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
+/* Prints a parse tree
+** Parameters: (const char *) tree_name: Name of the tree to dump (free-form)
+** (parse_tree_node_t *) nodes: The nodes containing the parse tree
+** Returns : (void)
+*/
+
+
+
+
+struct _state;
+
+int
+said(struct _state *s, byte *spec, int verbose);
+/* Builds a parse tree from a spec and compares it to a parse tree
+** Parameters: (state_t *) s: The affected state
+** (byte *) spec: Pointer to the spec to build
+** (int) verbose: Whether to display the parse tree after building it
+** Returns : (int) 1 on a match, 0 otherwise
+*/
+
+const char *
+vocab_get_any_group_word(int group, word_t **words, int words_nr);
+/* Gets any word from the specified group.
+** Parameters: (int) group: Group number.
+** (word_t **) words: List of words
+** (int) words_nr: Count of words in the list.
+** For debugging only.
+*/
+
+
+void
+vocab_decypher_said_block(struct _state *s, byte *pos);
+/* Decyphers a said block and dumps its content via sciprintf.
+** Parameters: (state_t *) s: The state to use
+** (byte *) pos: Pointer to the data to dump
+** For debugging only.
+*/
+
+
+void
+vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr);
+/* Synonymizes a token list
+** Parameters: (result_wort_t *) words: The word list to synonymize
+** (int) words_nr: Number of word_ts in the list
+** (synonym_t *) synonyms: Synonym list
+** (int) synonyms_nr: Number of synonyms in the list
+*/
+
+int
+vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+ parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose);
+
+void
+vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr);
+
+
+#endif