diff options
Diffstat (limited to 'engines/sci/parser')
-rw-r--r-- | engines/sci/parser/grammar.cpp | 44 | ||||
-rw-r--r-- | engines/sci/parser/said.cpp | 2831 | ||||
-rw-r--r-- | engines/sci/parser/said.y | 839 | ||||
-rw-r--r-- | engines/sci/parser/vocabulary.cpp | 329 | ||||
-rw-r--r-- | engines/sci/parser/vocabulary.h | 39 |
5 files changed, 973 insertions, 3109 deletions
diff --git a/engines/sci/parser/grammar.cpp b/engines/sci/parser/grammar.cpp index 070e6767cf..6f37b49919 100644 --- a/engines/sci/parser/grammar.cpp +++ b/engines/sci/parser/grammar.cpp @@ -422,44 +422,44 @@ ParseRuleList *Vocabulary::buildGNF(bool verbose) { return tlist; } -static int _vbpt_pareno(parse_tree_node_t *nodes, int *pos, int base) { +static int _vbpt_pareno(ParseTreeNode *nodes, int *pos, int base) { // Opens parentheses - nodes[base].content.branches[0] = (*pos) + 1; + nodes[base].left = &nodes[(*pos) + 1]; nodes[++(*pos)].type = kParseTreeBranchNode; - nodes[*pos].content.branches[0] = 0; - nodes[*pos].content.branches[1] = 0; + nodes[*pos].left = 0; + nodes[*pos].right = 0; return *pos; } -static int _vbpt_parenc(parse_tree_node_t *nodes, int *pos, int paren) { +static int _vbpt_parenc(ParseTreeNode *nodes, int *pos, int paren) { // Closes parentheses for appending - nodes[paren].content.branches[1] = ++(*pos); + nodes[paren].right = &nodes[++(*pos)]; nodes[*pos].type = kParseTreeBranchNode; - nodes[*pos].content.branches[0] = 0; - nodes[*pos].content.branches[1] = 0; + nodes[*pos].left = 0; + nodes[*pos].right = 0; return *pos; } -static int _vbpt_append(parse_tree_node_t *nodes, int *pos, int base, int value) { +static int _vbpt_append(ParseTreeNode *nodes, int *pos, int base, int value) { // writes one value to an existing base node and creates a successor node for writing - nodes[base].content.branches[0] = ++(*pos); + nodes[base].left = &nodes[++(*pos)]; nodes[*pos].type = kParseTreeLeafNode; - nodes[*pos].content.value = value; - nodes[base].content.branches[1] = ++(*pos); + nodes[*pos].value = value; + nodes[base].right = &nodes[++(*pos)]; nodes[*pos].type = kParseTreeBranchNode; - nodes[*pos].content.branches[0] = 0; - nodes[*pos].content.branches[1] = 0; + nodes[*pos].left = 0; + nodes[*pos].right = 0; return *pos; } -static int _vbpt_terminate(parse_tree_node_t *nodes, int *pos, int base, int value) { +static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) { // Terminates, overwriting a nextwrite forknode nodes[base].type = kParseTreeLeafNode; - nodes[base].content.value = value; + nodes[base].value = value; return *pos; } -static int _vbpt_write_subexpression(parse_tree_node_t *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) { +static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) { uint token; while ((token = ((rulepos < rule->_data.size()) ? rule->_data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) { @@ -565,15 +565,15 @@ int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) { int temp, pos; _parserNodes[0].type = kParseTreeBranchNode; - _parserNodes[0].content.branches[0] = 1; - _parserNodes[0].content.branches[1] = 2; + _parserNodes[0].left = &_parserNodes[1]; + _parserNodes[0].right = &_parserNodes[2]; _parserNodes[1].type = kParseTreeLeafNode; - _parserNodes[1].content.value = 0x141; + _parserNodes[1].value = 0x141; _parserNodes[2].type = kParseTreeBranchNode; - _parserNodes[2].content.branches[0] = 0; - _parserNodes[2].content.branches[1] = 0; + _parserNodes[2].left = 0; + _parserNodes[2].right = 0; pos = 2; diff --git a/engines/sci/parser/said.cpp b/engines/sci/parser/said.cpp index f49704372a..9c07be2dff 100644 --- a/engines/sci/parser/said.cpp +++ b/engines/sci/parser/said.cpp @@ -1,111 +1,3 @@ -/* A Bison parser, made by GNU Bison 2.3. */ - -/* Skeleton implementation for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.3" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 0 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - WGROUP = 258, - YY_COMMA = 259, - YY_AMP = 260, - YY_SLASH = 261, - YY_PARENO = 262, - YY_PARENC = 263, - YY_BRACKETSO = 264, - YY_BRACKETSC = 265, - YY_HASH = 266, - YY_LT = 267, - YY_GT = 268, - YY_BRACKETSO_LT = 269, - YY_BRACKETSO_SLASH = 270, - YY_LT_BRACKETSO = 271, - YY_LT_PARENO = 272 - }; -#endif -/* Tokens. */ -#define WGROUP 258 -#define YY_COMMA 259 -#define YY_AMP 260 -#define YY_SLASH 261 -#define YY_PARENO 262 -#define YY_PARENC 263 -#define YY_BRACKETSO 264 -#define YY_BRACKETSC 265 -#define YY_HASH 266 -#define YY_LT 267 -#define YY_GT 268 -#define YY_BRACKETSO_LT 269 -#define YY_BRACKETSO_SLASH 270 -#define YY_LT_BRACKETSO 271 -#define YY_LT_PARENO 272 - - - - -/* Copy the first part of user declarations. */ - - /* ScummVM - Graphic Adventure Engine * * ScummVM is the legal property of its developers, whose names @@ -133,14 +25,6 @@ #include "sci/engine/state.h" - -// Bison generates an empty switch statement that gives a warning in MSVC. -// This disables that warning. -#ifdef _MSC_VER -#pragma warning(disable:4065) -#endif - - namespace Sci { #define SAID_BRANCH_NULL 0 @@ -150,25 +34,8 @@ namespace Sci { // Maximum number of words to be expected in a parsed sentence #define AUGMENT_MAX_WORDS 64 - -#define ANYWORD 0xfff - -#define WORD_TYPE_BASE 0x141 -#define WORD_TYPE_REF 0x144 -#define WORD_TYPE_SYNTACTIC_SUGAR 0x145 - -#define AUGMENT_SENTENCE_PART_BRACKETS 0x152 - -// Minor numbers -#define AUGMENT_SENTENCE_MINOR_MATCH_PHRASE 0x14c -#define AUGMENT_SENTENCE_MINOR_MATCH_WORD 0x153 -#define AUGMENT_SENTENCE_MINOR_RECURSE 0x144 -#define AUGMENT_SENTENCE_MINOR_PARENTHESES 0x14f - - -#undef YYDEBUG /*1*/ -//#define SAID_DEBUG*/ -//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION // uncomment to debug parse tree augmentation +// uncomment to debug parse tree augmentation +//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION #ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION @@ -179,1840 +46,622 @@ void print_nothing(...) { } #endif -static char *said_parse_error; - static int said_token; static int said_tokens_nr; static int said_tokens[MAX_SAID_TOKENS]; -static int said_blessed; // increminated by said_top_branch - -static int said_tree_pos; // Set to 0 if we're out of space -#define SAID_TREE_START 4; // Reserve space for the 4 top nodes - -#define VALUE_IGNORE -424242 - -static parse_tree_node_t said_tree[VOCAB_TREE_NODES]; - -typedef int wgroup_t; -typedef int tree_t; -typedef int said_spec_t; - -static tree_t said_aug_branch(int, int, tree_t, tree_t); -static tree_t said_attach_branch(tree_t, tree_t); -/* -static tree_t said_wgroup_branch(wgroup_t); -*/ -static said_spec_t said_top_branch(tree_t); -static tree_t said_paren(tree_t, tree_t); -static tree_t said_value(int, tree_t); -static tree_t said_terminal(int); - -static int yylex(); - -static int yyerror(const char *s) { - said_parse_error = strdup(s); - return 1; /* Abort */ -} - - - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - - - -/* Copy the second part of user declarations. */ - - -/* Line 216 of yacc.c. */ - - -#ifdef short -# undef short -#endif -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if YYENABLE_NLS -# if ENABLE_NLS -# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int i) -#else -static int -YYID (i) - int i; -#endif -{ - return i; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined _STDLIB_H \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss; - YYSTYPE yyvs; - }; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack, Stack, yysize); \ - Stack = &yyptr->Stack; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 23 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 80 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 18 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 13 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 35 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 69 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 272 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint8 yyprhs[] = -{ - 0, 0, 3, 6, 10, 15, 16, 18, 19, 21, - 24, 29, 31, 34, 39, 41, 43, 45, 49, 51, - 55, 59, 64, 70, 73, 75, 77, 79, 83, 88, - 92, 97, 100, 105, 109, 112 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int8 yyrhs[] = -{ - 19, 0, -1, 21, 20, -1, 21, 22, 20, -1, - 21, 22, 23, 20, -1, -1, 13, -1, -1, 27, - -1, 6, 27, -1, 15, 6, 27, 10, -1, 6, - -1, 6, 27, -1, 15, 6, 27, 10, -1, 6, - -1, 3, -1, 26, -1, 9, 26, 10, -1, 24, - -1, 7, 27, 8, -1, 26, 4, 26, -1, 26, - 14, 29, 10, -1, 26, 4, 9, 26, 10, -1, - 25, 28, -1, 25, -1, 28, -1, 29, -1, 14, - 29, 10, -1, 29, 14, 29, 10, -1, 12, 24, - 30, -1, 17, 7, 27, 8, -1, 12, 26, -1, - 16, 9, 26, 10, -1, 12, 26, 30, -1, 12, - 26, -1, 17, 7, 27, 8, -1 +static int said_tree_pos; +#define SAID_TREE_START 4 // Reserve space for the 4 top nodes + +enum SaidToken { + TOKEN_COMMA = 0xF000, + TOKEN_AMP = 0xF100, + TOKEN_SLASH = 0xF200, + TOKEN_PARENO = 0xF300, + TOKEN_PARENC = 0xF400, + TOKEN_BRACKETO = 0xF500, + TOKEN_BRACKETC = 0xF600, + TOKEN_HASH = 0xF700, + TOKEN_LT = 0xF800, + TOKEN_GT = 0xF900, + TOKEN_TERM = 0xFF00 }; -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = -{ - 0, 130, 130, 132, 134, 140, 141, 148, 149, 155, - 157, 159, 165, 167, 169, 174, 179, 181, 186, 188, - 190, 192, 194, 199, 201, 203, 208, 210, 212, 217, - 219, 221, 223, 228, 230, 232 +enum SaidWord { + WORD_NONE = 0x0ffe, + WORD_ANY = 0x0fff }; -#endif -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "WGROUP", "YY_COMMA", "YY_AMP", - "YY_SLASH", "YY_PARENO", "YY_PARENC", "YY_BRACKETSO", "YY_BRACKETSC", - "YY_HASH", "YY_LT", "YY_GT", "YY_BRACKETSO_LT", "YY_BRACKETSO_SLASH", - "YY_LT_BRACKETSO", "YY_LT_PARENO", "$accept", "saidspec", "optcont", - "leftspec", "midspec", "rightspec", "word", "cwordset", "wordset", - "expr", "cwordrefset", "wordrefset", "recref", 0 -}; -#endif -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272 -}; -# endif -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 18, 19, 19, 19, 20, 20, 21, 21, 22, - 22, 22, 23, 23, 23, 24, 25, 25, 26, 26, - 26, 26, 26, 27, 27, 27, 28, 28, 28, 29, - 29, 29, 29, 30, 30, 30 -}; +// TODO: maybe turn this into a proper n-ary tree instead of an +// n-ary tree implemented in terms of a binary tree. +// (Together with _parserNodes in Vocabulary) -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 2, 3, 4, 0, 1, 0, 1, 2, - 4, 1, 2, 4, 1, 1, 1, 3, 1, 3, - 3, 4, 5, 2, 1, 1, 1, 3, 4, 3, - 4, 2, 4, 3, 2, 4 -}; +static ParseTreeNode said_tree[VOCAB_TREE_NODES]; -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 7, 15, 0, 0, 0, 0, 0, 0, 0, 5, - 18, 24, 16, 8, 25, 26, 0, 0, 18, 31, - 0, 0, 0, 1, 11, 6, 0, 2, 5, 23, - 0, 0, 0, 19, 17, 0, 0, 29, 27, 0, - 0, 9, 0, 14, 0, 3, 5, 0, 20, 0, - 0, 34, 0, 32, 30, 0, 12, 0, 4, 0, - 21, 28, 33, 0, 10, 0, 22, 35, 13 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 8, 27, 9, 28, 46, 10, 11, 12, 13, - 14, 15, 37 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -24 -static const yytype_int8 yypact[] = -{ - -1, -24, -1, 62, 62, 54, 1, 5, 18, 38, - -24, 47, 3, -24, -24, 12, 23, 15, -3, 3, - 28, 62, -1, -24, -1, -24, 42, -24, 39, -24, - 53, 54, 54, -24, -24, 62, 50, -24, -24, 29, - 41, -24, -1, -1, 52, -24, 55, 62, 3, 57, - 63, 20, -1, -24, -24, 64, -24, -1, -24, 32, - -24, -24, -24, 67, -24, 66, -24, -24, -24 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -24, -24, -23, -24, -24, -24, 68, -24, 0, -2, - 69, -4, 26 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint8 yytable[] = -{ - 16, 20, 1, 17, 19, 45, 2, 30, 3, 35, - 21, 4, 22, 5, 36, 6, 7, 31, 23, 30, - 40, 39, 41, 58, 30, 34, 32, 49, 50, 31, - 48, 33, 35, 30, 31, 51, 30, 36, 38, 53, - 55, 56, 66, 31, 24, 43, 31, 59, 42, 54, - 63, 25, 25, 26, 44, 65, 1, 52, 57, 4, - 2, 5, 47, 6, 7, 1, 4, 60, 25, 2, - 6, 7, 18, 61, 64, 67, 68, 62, 0, 0, - 29 -}; - -static const yytype_int8 yycheck[] = -{ - 2, 5, 3, 3, 4, 28, 7, 4, 9, 12, - 9, 12, 7, 14, 17, 16, 17, 14, 0, 4, - 22, 21, 24, 46, 4, 10, 14, 31, 32, 14, - 30, 8, 12, 4, 14, 35, 4, 17, 10, 10, - 42, 43, 10, 14, 6, 6, 14, 47, 6, 8, - 52, 13, 13, 15, 15, 57, 3, 7, 6, 12, - 7, 14, 9, 16, 17, 3, 12, 10, 13, 7, - 16, 17, 4, 10, 10, 8, 10, 51, -1, -1, - 11 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 3, 7, 9, 12, 14, 16, 17, 19, 21, - 24, 25, 26, 27, 28, 29, 27, 26, 24, 26, - 29, 9, 7, 0, 6, 13, 15, 20, 22, 28, - 4, 14, 14, 8, 10, 12, 17, 30, 10, 26, - 27, 27, 6, 6, 15, 20, 23, 9, 26, 29, - 29, 26, 7, 10, 8, 27, 27, 6, 20, 26, - 10, 10, 30, 27, 10, 27, 10, 8, 10 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* YY_LOCATION_PRINT -- Print the location on the stream. - This macro was not mandated originally: define only if we know - we won't break user code: when these are the locations we know. */ - -#ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL -# define YY_LOCATION_PRINT(File, Loc) \ - fprintf (File, "%d.%d-%d.%d", \ - (Loc).first_line, (Loc).first_column, \ - (Loc).last_line, (Loc).last_column) -# else -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -# endif -#endif +typedef int wgroup_t; +typedef int said_spec_t; -/* YYLEX -- calling `yylex' with the right arguments. */ -#ifdef YYLEX_PARAM -# define YYLEX yylex (YYLEX_PARAM) -#else -# define YYLEX yylex () -#endif +static ParseTreeNode* said_next_node() { + assert(said_tree_pos > 0 && said_tree_pos < VOCAB_TREE_NODES); -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (!yyvaluep) - return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } + return &said_tree[said_tree_pos++]; } +static ParseTreeNode* said_leaf_node(ParseTreeNode* pos, int value) { + pos->type = kParseTreeLeafNode; + pos->value = value; -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep); - YYFPRINTF (yyoutput, ")"); + return pos; } -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ +static ParseTreeNode* said_word_node(ParseTreeNode* pos, int value) { + pos->type = kParseTreeWordNode; + pos->value = value; -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *bottom, yytype_int16 *top) -#else -static void -yy_stack_print (bottom, top) - yytype_int16 *bottom; - yytype_int16 *top; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; bottom <= top; ++bottom) - YYFPRINTF (stderr, " %d", *bottom); - YYFPRINTF (stderr, "\n"); + return pos; } -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - +static ParseTreeNode* said_branch_node(ParseTreeNode* pos, + ParseTreeNode* left, + ParseTreeNode* right) { + pos->type = kParseTreeBranchNode; + pos->left = left; + pos->right = right; -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, int yyrule) -#else -static void -yy_reduce_print (yyvsp, yyrule) - YYSTYPE *yyvsp; - int yyrule; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - fprintf (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - ); - fprintf (stderr, "\n"); - } + return pos; } -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, Rule); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif +static ParseTreeNode* said_branch_attach_left(ParseTreeNode* pos, + ParseTreeNode* left) { + pos->type = kParseTreeBranchNode; + pos->left = left; -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif + return pos; - +} -#if YYERROR_VERBOSE +static ParseTreeNode* said_branch_attach_right(ParseTreeNode* pos, + ParseTreeNode* right) { + pos->type = kParseTreeBranchNode; + pos->right = right; -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; + return pos; } -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - while ((*yyd++ = *yys++) != '\0') - continue; - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into YYRESULT an error message about the unexpected token - YYCHAR while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yychar) -{ - int yyn = yypact[yystate]; - - if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; - else - { - int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - int yysize_overflow = 0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - int yyx; - -# if 0 - /* This is so xgettext sees the translatable formats that are - constructed on the fly. */ - YY_("syntax error, unexpected %s"); - YY_("syntax error, unexpected %s, expecting %s"); - YY_("syntax error, unexpected %s, expecting %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); -# endif - char *yyfmt; - char const *yyf; - static char const yyunexpected[] = "syntax error, unexpected %s"; - static char const yyexpecting[] = ", expecting %s"; - static char const yyor[] = " or %s"; - char yyformat[sizeof yyunexpected - + sizeof yyexpecting - 1 - + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) - * (sizeof yyor - 1))]; - char const *yyprefix = yyexpecting; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 1; - - yyarg[0] = yytname[yytype]; - yyfmt = yystpcpy (yyformat, yyunexpected); - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - yyformat[sizeof yyunexpected - 1] = '\0'; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - yyfmt = yystpcpy (yyfmt, yyprefix); - yyprefix = yyor; - } - - yyf = YY_(yyformat); - yysize1 = yysize + yystrlen (yyf); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - - if (yysize_overflow) - return YYSIZE_MAXIMUM; - - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyf) != '\0') - { - if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyf += 2; - } - else - { - yyp++; - yyf++; - } - } - } - return yysize; - } -} -#endif /* YYERROR_VERBOSE */ - - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) -#else -static void -yydestruct (yymsg, yytype, yyvaluep) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - YYUSE (yyvaluep); +/* + pos + / \ + . \ + * + / \ + / 0 + * + / \ + / \ + / subtree + major / \ + / . + minor + + . = unchanged child node + * = new branch node + 0 = NULL child node. (Location for future siblings of the subtree) - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); +*/ - switch (yytype) - { +static bool said_attach_subtree(ParseTreeNode* pos, int major, int minor, + ParseTreeNode* subtree) { + bool retval = true; - default: - break; - } -} - + said_branch_attach_right(pos, + said_branch_node(said_next_node(), + said_branch_node(said_next_node(), + said_leaf_node(said_next_node(), major), + said_branch_attach_left(subtree, + said_leaf_node(said_next_node(), minor))), + 0)); -/* Prevent warnings from -Wmissing-prototypes. */ + return retval; +} -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (void); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ -/* The look-ahead symbol. */ -int yychar; +/*****************/ +/**** Parsing ****/ +/*****************/ -/* The semantic value of the look-ahead symbol. */ -YYSTYPE yylval; +static bool parseSpec(ParseTreeNode* parentNode); +static bool parsePart2(ParseTreeNode* parentNode, bool& nonempty); +static bool parsePart3(ParseTreeNode* parentNode, bool& nonempty); +static bool parseSlash(ParseTreeNode* parentNode); +static bool parseExpr(ParseTreeNode* parentNode); +static bool parseRef(ParseTreeNode* parentNode); +static bool parseComma(ParseTreeNode* parentNode); +static bool parseList(ParseTreeNode* parentNode); +static bool parseListEntry(ParseTreeNode* parentNode); +static bool parseWord(ParseTreeNode* parentNode); -/* Number of syntax errors so far. */ -int yynerrs; +static bool parseWord(ParseTreeNode* parentNode) +{ + int token = said_tokens[said_token]; + if (token & 0x8000) + return false; + said_token++; + ParseTreeNode* newNode = said_word_node(said_next_node(), token); -/*----------. -| yyparse. | -`----------*/ + parentNode->right = newNode; -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void) -#else -int -yyparse () + return true; +} -#endif -#endif +static bool parsePart2(ParseTreeNode* parentNode, bool& nonempty) { - - int yystate; - int yyn; - int yyresult; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - /* Look-ahead token as an internal (translated) token number. */ - int yytoken = 0; -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - /* Three stacks and their tools: - `yyss': related to states, - `yyvs': related to semantic values, - `yyls': related to locations. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss = yyssa; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs = yyvsa; - YYSTYPE *yyvsp; - - - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); - YYSIZE_T yystacksize = YYINITDEPTH; - - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; + nonempty = true; + bool found; - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; + found = parseSlash(newNode); - YYDPRINTF ((stderr, "Starting parse\n")); + if (found) { - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ + said_attach_subtree(parentNode, 0x142, 0x14a, newNode); - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ + return true; - yyssp = yyss; - yyvsp = yyvs; + } else if (said_tokens[said_token] == TOKEN_BRACKETO) { + said_token++; + + found = parsePart2(newNode, nonempty); - goto yysetstate; + if (found) { -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss); - YYSTACK_RELOCATE (yyvs); - -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - look-ahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to look-ahead token. */ - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a look-ahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } + if (said_tokens[said_token] == TOKEN_BRACKETC) { + said_token++; - if (yyn == YYFINAL) - YYACCEPT; + said_attach_subtree(parentNode, 0x152, 0x142, newNode); - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; + return true; + } + } - /* Shift the look-ahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + } - /* Discard the shifted token unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; + // CHECKME: this doesn't look right if the [] section matched partially + // Should the below 'if' be an 'else if' ? - yystate = yyn; - *++yyvsp = yylval; + if (said_tokens[said_token] == TOKEN_SLASH) { + said_token++; - goto yynewstate; + nonempty = false; + return true; -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; + } + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; +static bool parsePart3(ParseTreeNode* parentNode, bool& nonempty) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; + bool found; + nonempty = true; - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 2: + found = parseSlash(newNode); - { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)]))); ;} - break; + if (found) { - case 3: + said_attach_subtree(parentNode, 0x143, 0x14a, newNode); - { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (3)]), said_attach_branch((yyvsp[(2) - (3)]), (yyvsp[(3) - (3)])))); ;} - break; + return true; - case 4: + } else if (said_tokens[said_token] == TOKEN_BRACKETO) { + said_token++; + + found = parsePart3(newNode, nonempty); - { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (4)]), said_attach_branch((yyvsp[(2) - (4)]), said_attach_branch((yyvsp[(3) - (4)]), (yyvsp[(4) - (4)]))))); ;} - break; + if (found) { - case 5: + if (said_tokens[said_token] == TOKEN_BRACKETC) { + said_token++; - { (yyval) = SAID_BRANCH_NULL; ;} - break; + said_attach_subtree(parentNode, 0x152, 0x143, newNode); - case 6: + return true; + } + } - { (yyval) = said_paren(said_value(0x14b, said_value(0xf900, said_terminal(0xf900))), SAID_BRANCH_NULL); ;} - break; + } - case 7: + // CHECKME: this doesn't look right if the [] section matched partially + // Should the below 'if' be an 'else if' ? - { (yyval) = SAID_BRANCH_NULL; ;} - break; + if (said_tokens[said_token] == TOKEN_SLASH) { + said_token++; - case 8: + nonempty = false; - { (yyval) = said_paren(said_value(0x141, said_value(0x149, (yyvsp[(1) - (1)]))), SAID_BRANCH_NULL); ;} - break; + return true; - case 9: + } - { (yyval) = said_aug_branch(0x142, 0x14a, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;} - break; + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} - case 10: - { (yyval) = said_aug_branch(0x152, 0x142, said_aug_branch(0x142, 0x14a, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; +static bool parseSlash(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - case 11: + if (said_tokens[said_token] == TOKEN_SLASH) { + said_token++; - { (yyval) = SAID_BRANCH_NULL; ;} - break; + bool found = parseExpr(parentNode); - case 12: + if (found) + return true; - { (yyval) = said_aug_branch(0x143, 0x14a, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;} - break; + } - case 13: + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} - { (yyval) = said_aug_branch(0x152, 0x143, said_aug_branch(0x143, 0x14a, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; - case 14: +static bool parseRef(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - { (yyval) = SAID_BRANCH_NULL; ;} - break; + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); - case 15: + ParseTreeNode* newParent = parentNode; - { (yyval) = said_paren(said_value(0x141, said_value(0x153, said_terminal((yyvsp[(1) - (1)])))), SAID_BRANCH_NULL); ;} - break; + bool found; - case 16: + if (said_tokens[said_token] == TOKEN_LT) { + said_token++; - { (yyval) = said_aug_branch(0x141, 0x14f, (yyvsp[(1) - (1)]), SAID_BRANCH_NULL); ;} - break; + found = parseList(newNode); - case 17: + if (found) { - { (yyval) = said_aug_branch(0x141, 0x14f, said_aug_branch(0x152, 0x14c, said_aug_branch(0x141, 0x14f, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; + said_attach_subtree(newParent, 0x144, 0x14f, newNode); - case 18: + newParent = newParent->right; + + newNode = said_branch_node(said_next_node(), 0, 0); - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; + found = parseRef(newNode); - case 19: + if (found) { - { (yyval) = said_aug_branch(0x141, 0x14c, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL); ;} - break; + said_attach_subtree(newParent, 0x141, 0x144, newNode); - case 20: + } - { (yyval) = said_attach_branch((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; + return true; - case 21: + } - { (yyval) = said_attach_branch((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;} - break; + } - case 22: + // NB: This is not an "else if'. + // If there is a "< [ ... ]", that is parsed as "< ..." - { (yyval) = said_attach_branch((yyvsp[(1) - (5)]), (yyvsp[(3) - (5)])); ;} - break; + if (said_tokens[said_token] == TOKEN_BRACKETO) { + said_token++; + + found = parseRef(newNode); - case 23: + if (found) { - { (yyval) = said_attach_branch((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;} - break; + if (said_tokens[said_token] == TOKEN_BRACKETC) { + said_token++; - case 24: + said_attach_subtree(parentNode, 0x152, 0x144, newNode); - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; + return true; + } + } - case 25: + } - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} - case 26: +static bool parseComma(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; + if (said_tokens[said_token] == TOKEN_COMMA) { + said_token++; - case 27: + bool found = parseList(parentNode); - { (yyval) = said_aug_branch(0x152, 0x144, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL); ;} - break; + if (found) + return true; - case 28: + } - { (yyval) = said_attach_branch((yyvsp[(1) - (4)]), said_aug_branch(0x152, 0x144, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL)); ;} - break; + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} - case 29: +static bool parseListEntry(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - { (yyval) = said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (3)]), (yyvsp[(3) - (3)])); ;} - break; + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); - case 30: + bool found; - { (yyval) = said_aug_branch(0x144, 0x14f, said_aug_branch(0x141, 0x144, (yyvsp[(2) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; + if (said_tokens[said_token] == TOKEN_BRACKETO) { + said_token++; - case 31: + found = parseExpr(newNode); - { (yyval) = said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;} - break; + if (found) { - case 32: + if (said_tokens[said_token] == TOKEN_BRACKETC) { + said_token++; - { (yyval) = said_aug_branch(0x152, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; + said_attach_subtree(parentNode, 0x152, 0x14c, newNode); - case 33: + return true; + } + } - { (yyval) = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL), (yyvsp[(3) - (3)])); ;} - break; + } else if (said_tokens[said_token] == TOKEN_PARENO) { + said_token++; - case 34: + found = parseExpr(newNode); - { (yyval) = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;} - break; + if (found) { - case 35: + if (said_tokens[said_token] == TOKEN_PARENC) { + said_token++; - { (yyval) = said_aug_branch(0x141, 0x14c, (yyvsp[(2) - (4)]), SAID_BRANCH_NULL); ;} - break; + said_attach_subtree(parentNode, 0x141, 0x14c, newNode); + return true; + } + } -/* Line 1267 of yacc.c. */ + } else if (parseWord(newNode)) { - default: break; - } - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + said_attach_subtree(parentNode, 0x141, 0x153, newNode); - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); + return true; - *++yyvsp = yyval; + } - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} - yyn = yyr1[yyn]; +static bool parseList(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; + bool found; - goto yynewstate; + ParseTreeNode* newParent = parentNode; + found = parseListEntry(newParent); -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (YY_("syntax error")); -#else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yychar); - yyerror (yymsg); - } - else - { - yyerror (YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } -#endif - } + if (found) { + newParent = newParent->right; + found = parseComma(newParent); - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse look-ahead token after an - error, discard it. */ + return true; - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse look-ahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } } - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; +} +static bool parseExpr(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; - yydestruct ("Error: popping", - yystos[yystate], yyvsp); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); - if (yyn == YYFINAL) - YYACCEPT; + bool ret = false; + bool found; - *++yyvsp = yylval; + ParseTreeNode* newParent = parentNode; + found = parseList(newNode); - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + if (found) { + ret = true; - yystate = yyn; - goto yynewstate; + said_attach_subtree(newParent, 0x141, 0x14F, newNode); + newParent = newParent->right; -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; + } -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; + found = parseRef(newParent); -#ifndef yyoverflow -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif + if (found || ret) + return true; -yyreturn: - if (yychar != YYEOF && yychar != YYEMPTY) - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval); - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; } +static bool parseSpec(ParseTreeNode* parentNode) +{ + // Store current state for rolling back if we fail + int curToken = said_token; + int curTreePos = said_tree_pos; + ParseTreeNode* curRightChild = parentNode->right; + ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0); + bool ret = false; + bool found; -int parse_yy_token_lookup[] = {YY_COMMA, YY_AMP, YY_SLASH, YY_PARENO, YY_PARENC, YY_BRACKETSO, YY_BRACKETSC, YY_HASH, YY_LT, YY_GT}; + ParseTreeNode* newParent = parentNode; -static int yylex() { - int retval = said_tokens[said_token++]; + found = parseExpr(newNode); - if (retval < SAID_LONG(SAID_FIRST)) { - yylval = retval; - retval = WGROUP; - } else { - retval >>= 8; - - if (retval == SAID_TERM) - retval = 0; - else { - assert(retval >= SAID_FIRST); - retval = parse_yy_token_lookup[retval - SAID_FIRST]; - if (retval == YY_BRACKETSO) { - if ((said_tokens[said_token] >> 8) == SAID_LT) - retval = YY_BRACKETSO_LT; - else - if ((said_tokens[said_token] >> 8) == SAID_SLASH) - retval = YY_BRACKETSO_SLASH; - } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_BRACKO) { - retval = YY_LT_BRACKETSO; - } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_PARENO) { - retval = YY_LT_PARENO; - } - } - } + if (found) { + // Sentence part 1 found + said_attach_subtree(newParent, 0x141, 0x149, newNode); - return retval; -} + newParent = newParent->right; -static int said_next_node() { - return ((said_tree_pos == 0) || (said_tree_pos >= VOCAB_TREE_NODES)) ? said_tree_pos = 0 : said_tree_pos++; -} + ret = true; + } -#define SAID_NEXT_NODE said_next_node() + bool nonempty; -static int said_leaf_node(tree_t pos, int value) { - said_tree[pos].type = kParseTreeLeafNode; + found = parsePart2(newParent, nonempty); - if (value != VALUE_IGNORE) - said_tree[pos].content.value = value; + if (found) { - return pos; -} + ret = true; -static int said_branch_node(tree_t pos, int left, int right) { - said_tree[pos].type = kParseTreeBranchNode; + if (nonempty) // non-empty part found + newParent = newParent->right; - if (left != VALUE_IGNORE) - said_tree[pos].content.branches[0] = left; - if (right != VALUE_IGNORE) - said_tree[pos].content.branches[1] = right; + found = parsePart3(newParent, nonempty); - return pos; -} + if (found) { -static tree_t said_paren(tree_t t1, tree_t t2) { - if (t1) - return said_branch_node(SAID_NEXT_NODE, t1, t2); - else - return t2; -} + if (nonempty) + newParent = newParent->right; + } + } -static tree_t said_value(int val, tree_t t) { - return said_branch_node(SAID_NEXT_NODE, said_leaf_node(SAID_NEXT_NODE, val), t); + if (said_tokens[said_token] == TOKEN_GT) { + said_token++; -} + newNode = said_branch_node(said_next_node(), 0, + said_leaf_node(said_next_node(), TOKEN_GT)); -static tree_t said_terminal(int val) { - return said_leaf_node(SAID_NEXT_NODE, val); -} + said_attach_subtree(newParent, 0x14B, TOKEN_GT, newNode); -static tree_t said_aug_branch(int n1, int n2, tree_t t1, tree_t t2) { - int retval; + } - retval = said_branch_node(SAID_NEXT_NODE, - said_branch_node(SAID_NEXT_NODE, - said_leaf_node(SAID_NEXT_NODE, n1), - said_branch_node(SAID_NEXT_NODE, - said_leaf_node(SAID_NEXT_NODE, n2), - t1) - ), - t2); - -#ifdef SAID_DEBUG - fprintf(stderr, "AUG(0x%x, 0x%x, [%04x], [%04x]) = [%04x]\n", n1, n2, t1, t2, retval); -#endif - return retval; + if (ret) + return true; + + // Rollback + said_token = curToken; + said_tree_pos = curTreePos; + parentNode->right = curRightChild; + return false; } -static tree_t said_attach_branch(tree_t base, tree_t attacheant) { -#ifdef SAID_DEBUG - fprintf(stderr, "ATT2([%04x], [%04x]) = [%04x]\n", base, attacheant, base); -#endif - if (!attacheant) - return base; - if (!base) - return attacheant; +static bool buildSaidTree() { + said_branch_node(said_tree, &said_tree[1], &said_tree[2]); + said_leaf_node(&said_tree[1], 0x141); // Magic number #1 + said_branch_node(&said_tree[2], &said_tree[3], 0); + said_leaf_node(&said_tree[3], 0x13f); // Magic number #2 - if (!base) - return 0; // Happens if we're out of space + said_tree_pos = SAID_TREE_START; - said_branch_node(base, VALUE_IGNORE, attacheant); + bool ret = parseSpec(&said_tree[2]); - return base; -} + if (!ret) + return false; -static said_spec_t said_top_branch(tree_t first) { -#ifdef SAID_DEBUG - fprintf(stderr, "TOP([%04x])\n", first); -#endif - said_branch_node(0, 1, 2); - said_leaf_node(1, 0x141); // Magic number #1 - said_branch_node(2, 3, first); - said_leaf_node(3, 0x13f); // Magic number #2 + if (said_tokens[said_token] != TOKEN_TERM) { + // No terminator, so parse error. - ++said_blessed; + // Rollback + said_tree[2].right = 0; + said_token = 0; + said_tree_pos = SAID_TREE_START; + return false; + } - return 0; + return true; } -static int said_parse_spec(byte *spec) { +static int said_parse_spec(const byte *spec) { int nextitem; - said_parse_error = NULL; said_token = 0; said_tokens_nr = 0; - said_blessed = 0; said_tree_pos = SAID_TREE_START; @@ -2025,26 +674,13 @@ static int said_parse_spec(byte *spec) { } while ((nextitem != SAID_TERM) && (said_tokens_nr < MAX_SAID_TOKENS)); - if (nextitem == SAID_TERM) - yyparse(); - else { + if (nextitem != SAID_TERM) { warning("SAID spec is too long"); return 1; } - if (said_parse_error) { - warning("Error while parsing SAID spec: %s", said_parse_error); - free(said_parse_error); - return 1; - } - - if (said_tree_pos == 0) { - warning("Out of tree space while parsing SAID spec"); - return 1; - } - - if (said_blessed != 1) { - warning("Found multiple top branches"); + if (!buildSaidTree()) { + warning("Error while parsing SAID spec"); return 1; } @@ -2055,385 +691,304 @@ static int said_parse_spec(byte *spec) { /**** Augmentation ****/ /**********************/ -// primitive functions +static bool dontclaim; +static int outputDepth; -#define AUG_READ_BRANCH(a, br, p) \ - if (tree[p].type != kParseTreeBranchNode) \ - return 0; \ - a = tree[p].content.branches[br]; +enum ScanSaidType { + SCAN_SAID_AND = 0, + SCAN_SAID_OR = 1 +}; -#define AUG_READ_VALUE(a, p) \ - if (tree[p].type != kParseTreeLeafNode) \ - return 0; \ - a = tree[p].content.value; +static int matchTrees(ParseTreeNode* parseT, ParseTreeNode* saidT); +static int scanSaidChildren(ParseTreeNode* parseT, ParseTreeNode* saidT, + ScanSaidType type); +static int scanParseChildren(ParseTreeNode* parseT, ParseTreeNode* saidT); -#define AUG_ASSERT(i) \ - if (!i) return 0; -static int aug_get_next_sibling(parse_tree_node_t *tree, int pos, int *first, int *second) { - // Returns the next sibling relative to the specified position in 'tree', - // sets *first and *second to its augment node values, returns the new position - // or 0 if there was no next sibling - int seek, valpos; +static int node_major(ParseTreeNode* node) { + assert(node->type == kParseTreeBranchNode); + assert(node->left->type == kParseTreeLeafNode); + return node->left->value; +} +static int node_minor(ParseTreeNode* node) { + assert(node->type == kParseTreeBranchNode); + assert(node->right->type == kParseTreeBranchNode); + assert(node->right->left->type == kParseTreeLeafNode); + return node->right->left->value; +} +static bool node_is_terminal(ParseTreeNode* node) { + return (node->right->right && + node->right->right->type != kParseTreeBranchNode); +} +static int node_terminal_value(ParseTreeNode* node) { + assert(node_is_terminal(node)); + return node->right->right->value; +} +#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION +static void node_print_desc(ParseTreeNode* node) { + assert(node); + assert(node->left); + if (node->left->type == kParseTreeBranchNode) { + scidprintf("< "); + node_print_desc(node->left); + scidprintf(", ...>"); + } else { + if (node_is_terminal(node)) { + scidprintf("(%03x %03x %03x)", node_major(node), + node_minor(node), + node_terminal_value(node)); + } else { + scidprintf("(%03x %03x <...>)", node_major(node), + node_minor(node)); + } + } +} +#else +static void node_print_desc(ParseTreeNode*) { } +#endif - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(seek, 0, pos); - AUG_ASSERT(seek); - // Now retrieve first value - AUG_READ_BRANCH(valpos, 0, seek); - AUG_ASSERT(valpos); - AUG_READ_VALUE(*first, valpos); - // Get second value - AUG_READ_BRANCH(seek, 1, seek); - AUG_ASSERT(seek); - AUG_READ_BRANCH(valpos, 0, seek); - AUG_ASSERT(valpos); - AUG_READ_VALUE(*second, valpos); - return pos; -} +static int matchTrees(ParseTreeNode* parseT, ParseTreeNode* saidT) +{ + outputDepth++; + scidprintf("%*smatchTrees on ", outputDepth, ""); + node_print_desc(parseT); + scidprintf(" and "); + node_print_desc(saidT); + scidprintf("\n"); -static int aug_get_wgroup(parse_tree_node_t *tree, int pos) { - // Returns 0 if pos in tree is not the root of a 3-element list, otherwise - // it returns the last element (which, in practice, is the word group - int val; + bool inParen = node_minor(saidT) == 0x14F || node_minor(saidT) == 0x150; + bool inBracket = node_major(saidT) == 0x152; - AUG_READ_BRANCH(pos, 0, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_VALUE(val, pos); + int ret; - return val; -} + if (node_major(parseT) != 0x141 && + node_major(saidT) != 0x141 && node_major(saidT) != 0x152 && + node_major(saidT) != node_major(parseT)) + { + ret = -1; + } -static int aug_get_base_node(parse_tree_node_t *tree) { - int startpos = 0; - AUG_READ_BRANCH(startpos, 1, startpos); + // parse major is 0x141 and/or + // said major is 0x141/0x152 and/or + // said major is parse major - return startpos; -} + else if (node_is_terminal(saidT) && node_is_terminal(parseT) ) { -// semi-primitive functions + // both saidT and parseT are terminals -static int aug_get_first_child(parse_tree_node_t *tree, int pos, int *first, int *second) { - // like aug_get_next_sibling, except that it recurses into the tree and - // finds the first child (usually *not* Ayanami Rei) of the current branch - // rather than its next sibling. - AUG_READ_BRANCH(pos, 0, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); + int said_val = node_terminal_value(saidT); + int parse_val = node_terminal_value(parseT); - return aug_get_next_sibling(tree, pos, first, second); -} + if (said_val != WORD_NONE && + (said_val == parse_val || said_val == WORD_ANY || + parse_val == WORD_ANY)) + ret = 1; + else + ret = -1; -static void aug_find_words_recursively(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr, - int *ref_words, int *ref_words_nr, int maxwords, int refbranch) { - // Finds and lists all base (141) and reference (144) words */ - int major, minor; - int word; - int pos = aug_get_first_child(tree, startpos, &major, &minor); + scidprintf("%*smatchTrees matching terminals: %03x vs %03x (%d)\n", + outputDepth, "", parse_val, said_val, ret); - //if (major == WORD_TYPE_REF) - // refbranch = 1; + } else if (node_is_terminal(saidT) && !node_is_terminal(parseT)) { - while (pos) { - if ((word = aug_get_wgroup(tree, pos))) { // found a word - if (!refbranch && major == WORD_TYPE_BASE) { - if ((*base_words_nr) == maxwords) { - warning("Out of regular words"); - return; // return gracefully - } + // saidT is a terminal, but parseT isn't - base_words[*base_words_nr] = word; // register word - ++(*base_words_nr); + if (node_major(parseT) == 0x141 || + node_major(parseT) == node_major(saidT)) + ret = scanParseChildren(parseT->right->right, saidT); + else + ret = 0; - } - if (major == WORD_TYPE_REF || refbranch) { - if ((*ref_words_nr) == maxwords) { - warning("Out of reference words"); - return; // return gracefully - } + } else if (node_is_terminal(parseT)) { - ref_words[*ref_words_nr] = word; // register word - ++(*ref_words_nr); + // parseT is a terminal, but saidT isn't - } - if (major != WORD_TYPE_SYNTACTIC_SUGAR && major != WORD_TYPE_BASE && major != WORD_TYPE_REF) - warning("aug_find_words_recursively(): Unknown word type %03x", major); + if (node_major(saidT) == 0x141 || node_major(saidT) == 0x152 || + node_major(saidT) == node_major(parseT)) + ret = scanSaidChildren(parseT, saidT->right->right, + inParen ? SCAN_SAID_OR : SCAN_SAID_AND ); + else + ret = 0; - } else // Did NOT find a word group: Attempt to recurse - aug_find_words_recursively(tree, pos, base_words, base_words_nr, - ref_words, ref_words_nr, maxwords, refbranch || major == WORD_TYPE_REF); + } else if (node_major(saidT) != 0x141 && node_major(saidT) != 0x152 && + node_major(saidT) != node_major(parseT)) { - pos = aug_get_next_sibling(tree, pos, &major, &minor); - } -} + // parseT and saidT both aren't terminals + // said major is not 0x141 or 0x152 or parse major + ret = scanParseChildren(parseT->right->right, saidT); -static void aug_find_words(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr, - int *ref_words, int *ref_words_nr, int maxwords) { - // initializing wrapper for aug_find_words_recursively() - *base_words_nr = 0; - *ref_words_nr = 0; + } else { - aug_find_words_recursively(tree, startpos, base_words, base_words_nr, ref_words, ref_words_nr, maxwords, 0); -} + // parseT and saidT are both not terminals, + // said major 0x141 or 0x152 or equal to parse major + ret = scanSaidChildren(parseT->right->right, saidT->right->right, + inParen ? SCAN_SAID_OR : SCAN_SAID_AND); -static int aug_contains_word(int *list, int length, int word) { - int i; + } - if (word == ANYWORD) - return (length); + if (inBracket && ret == 0) { + scidprintf("%*smatchTrees changing ret to 1 due to brackets\n", + outputDepth, ""); + ret = 1; + } - for (i = 0; i < length; i++) - if (list[i] == word) - return 1; + scidprintf("%*smatchTrees returning %d\n", outputDepth, "", ret); + outputDepth--; - return 0; + return ret; } -static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_branch, int major, int minor, int *base_words, int base_words_nr, - int *ref_words, int ref_words_nr); +static int scanSaidChildren(ParseTreeNode* parseT, ParseTreeNode* saidT, + ScanSaidType type) { + outputDepth++; + scidprintf("%*sscanSaid(%s) on ", outputDepth, "", + type == SCAN_SAID_OR ? "OR" : "AND"); + node_print_desc(parseT); + scidprintf(" and "); + node_print_desc(saidT); + scidprintf("\n"); -static int augment_match_expression_p(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_basepos, int major, int minor, - int *base_words, int base_words_nr, int *ref_words, int ref_words_nr) { - int cmajor, cminor, cpos; - cpos = aug_get_first_child(saidt, augment_pos, &cmajor, &cminor); - if (!cpos) { - warning("augment_match_expression_p(): Empty condition"); - return 1; - } + int ret = 1; - scidprintf("Attempting to match (%03x %03x (%03x %03x\n", major, minor, cmajor, cminor); - - if ((major == WORD_TYPE_BASE) && (minor == AUGMENT_SENTENCE_MINOR_RECURSE)) - return augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr); - - switch (major) { - - case WORD_TYPE_BASE: - while (cpos) { - if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { - int word = aug_get_wgroup(saidt, cpos); - scidprintf("Looking for word %03x\n", word); - - if (aug_contains_word(base_words, base_words_nr, word)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { - if (augment_sentence_expression(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { - int gc_major, gc_minor; - int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); - - while (gchild) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major, - minor, base_words, base_words_nr, - ref_words, ref_words_nr)) - return 1; - gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); - } - } else - warning("augment_match_expression_p(): Unknown type 141 minor number %3x", cminor); - - cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); + assert(!(type == SCAN_SAID_OR && !saidT)); - } - break; - - case WORD_TYPE_REF: - while (cpos) { - if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { - int word = aug_get_wgroup(saidt, cpos); - scidprintf("Looking for refword %03x\n", word); - - if (aug_contains_word(ref_words, ref_words_nr, word)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { - int gc_major, gc_minor; - int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); - - while (gchild) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major, - minor, base_words, base_words_nr, - ref_words, ref_words_nr)) - return 1; - gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); - } - } else - warning("augment_match_expression_p(): Unknown type 144 minor number %3x", cminor); - - cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); + while (saidT) { + assert(saidT->type == kParseTreeBranchNode); - } - break; + ParseTreeNode* saidChild = saidT->left; + assert(saidChild); - case AUGMENT_SENTENCE_PART_BRACKETS: - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; + if (node_major(saidChild) != 0x145) { - scidprintf("Didn't match subexpression; checking sub-bracked predicate %03x\n", cmajor); + ret = scanParseChildren(parseT, saidChild); - switch (cmajor) { - case WORD_TYPE_BASE: - if (!base_words_nr) - return 1; - break; + if (type == SCAN_SAID_AND && ret != 1) + break; - case WORD_TYPE_REF: - if (!ref_words_nr) - return 1; - break; + if (type == SCAN_SAID_OR && ret == 1) + break; - default: - warning("augment_match_expression_p(): (subp1) Unkonwn sub-bracket predicate %03x", cmajor); } - break; + saidT = saidT->right; - default: - warning("augment_match_expression_p(): Unknown predicate %03x", major); + } + scidprintf("%*sscanSaid returning %d\n", outputDepth, "", ret); + outputDepth--; + return ret; +} + + +static int scanParseChildren(ParseTreeNode* parseT, ParseTreeNode* saidT) { + + outputDepth++; + scidprintf("%*sscanParse on ", outputDepth, ""); + node_print_desc(parseT); + scidprintf(" and "); + node_print_desc(saidT); + scidprintf("\n"); + + if (node_major(saidT) == 0x14B) { + dontclaim = true; + scidprintf("%*sscanParse returning 1 (0x14B)\n", outputDepth, ""); + outputDepth--; + return 1; } - scidprintf("augment_match_expression_p(): Generic failure\n"); + bool inParen = node_minor(saidT) == 0x14F || node_minor(saidT) == 0x150; + bool inBracket = node_major(saidT) == 0x152; - return 0; -} + int ret; -static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_branch, int major, int minor, int *base_words, int base_words_nr, - int *ref_words, int ref_words_nr) { - int check_major, check_minor; - int check_pos = aug_get_first_child(saidt, augment_pos, &check_major, &check_minor); - do { - if (!(augment_match_expression_p(saidt, check_pos, parset, parse_branch, check_major, check_minor, - base_words, base_words_nr, ref_words, ref_words_nr))) - return 0; - } while ((check_pos = aug_get_next_sibling(saidt, check_pos, &check_major, &check_minor))); + // descend further down saidT before actually scanning parseT + if ((node_major(saidT) == 0x141 || node_major(saidT) == 0x152) && + !node_is_terminal(saidT)) { - return 1; -} + ret = scanSaidChildren(parseT, saidT->right->right, + inParen ? SCAN_SAID_OR : SCAN_SAID_AND ); -static int augment_sentence_part(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, int parse_basepos, int major, int minor) { - int pmajor, pminor; - int parse_branch = parse_basepos; - int optional = 0; - int foundwords = 0; + } else if (parseT && parseT->left->type == kParseTreeBranchNode) { - scidprintf("Augmenting (%03x %03x\n", major, minor); + ret = 0; + int subresult = 0; - if (major == AUGMENT_SENTENCE_PART_BRACKETS) { // '[/ foo]' is true if '/foo' or if there - // exists no x for which '/x' is true - if ((augment_pos = aug_get_first_child(saidt, augment_pos, &major, &minor))) { - scidprintf("Optional part: Now augmenting (%03x %03x\n", major, minor); - optional = 1; - } else { - scidprintf("Matched empty optional expression\n"); - return 1; - } - } + while (parseT) { + assert(parseT->type == kParseTreeBranchNode); - if ((major < 0x141) || (major > 0x143)) { - scidprintf("augment_sentence_part(): Unexpected sentence part major number %03x\n", major); - return 0; - } + ParseTreeNode* parseChild = parseT->left; + assert(parseChild); - while ((parse_branch = aug_get_next_sibling(parset, parse_branch, &pmajor, &pminor))) { - if (pmajor == major) { // found matching sentence part - int success; - int base_words_nr; - int ref_words_nr; - int base_words[AUGMENT_MAX_WORDS]; - int ref_words[AUGMENT_MAX_WORDS]; -#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION - int i; -#endif + scidprintf("%*sscanning next: ", outputDepth, ""); + node_print_desc(parseChild); + scidprintf("\n"); - scidprintf("Found match with pminor = %03x\n", pminor); - aug_find_words(parset, parse_branch, base_words, &base_words_nr, ref_words, &ref_words_nr, AUGMENT_MAX_WORDS); - foundwords |= (ref_words_nr | base_words_nr); -#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION - printf("%d base words:", base_words_nr); - for (i = 0; i < base_words_nr; i++) - printf(" %03x", base_words[i]); - printf("\n%d reference words:", ref_words_nr); - for (i = 0; i < ref_words_nr; i++) - printf(" %03x", ref_words[i]); - printf("\n"); -#endif + if (node_major(parseChild) == node_major(saidT) || + node_major(parseChild) == 0x141) + subresult = matchTrees(parseChild, saidT); - success = augment_sentence_expression(saidt, augment_pos, parset, parse_basepos, major, minor, - base_words, base_words_nr, ref_words, ref_words_nr); + if (subresult != 0) + ret = subresult; + + if (ret == 1) + break; + + parseT = parseT->right; - if (success) { - scidprintf("SUCCESS on augmenting (%03x %03x\n", major, minor); - return 1; - } } + + // ret is now: + // 1 if ANY matchTrees(parseSibling, saidTree) returned 1 + // ELSE: -1 if ANY returned -1 + // ELSE: 0 + + } else { + + ret = matchTrees(parseT, saidT); + } - if (optional && (foundwords == 0)) { - scidprintf("Found no words and optional branch => SUCCESS on augmenting (%03x %03x\n", major, minor); - return 1; + if (inBracket && ret == 0) { + scidprintf("%*sscanParse changing ret to 1 due to brackets\n", + outputDepth, ""); + ret = 1; } - scidprintf("FAILURE on augmenting (%03x %03x\n", major, minor); - return 0; + scidprintf("%*sscanParse returning %d\n", outputDepth, "", ret); + outputDepth--; + + return ret; } -static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *saidt) { - int augment_basepos = 0; - int parse_basepos; - int major, minor; - int dontclaim = 0; - parse_basepos = aug_get_base_node(parset); - if (!parse_basepos) { - warning("augment_parse_nodes(): Parse tree is corrupt"); - return 0; - } - augment_basepos = aug_get_base_node(saidt); - if (!augment_basepos) { - warning("augment_parse_nodes(): Said tree is corrupt"); - return 0; - } +static int augment_parse_nodes(ParseTreeNode *parseT, ParseTreeNode *saidT) { + outputDepth = 0; + scidprintf("augment_parse_nodes on "); + node_print_desc(parseT); + scidprintf(" and "); + node_print_desc(saidT); + scidprintf("\n"); - while ((augment_basepos = aug_get_next_sibling(saidt, augment_basepos, &major, &minor))) { - if ((major == 0x14b) && (minor == SAID_LONG(SAID_GT))) - dontclaim = 1; // special case - else // normal sentence part - if (!(augment_sentence_part(saidt, augment_basepos, parset, parse_basepos, major, minor))) { - scidprintf("Returning failure\n"); - return 0; // fail - } - } + dontclaim = false; + + int ret = matchTrees(parseT, saidT); + + scidprintf("matchTrees returned %d\n", ret); - scidprintf("Returning success with dontclaim=%d\n", dontclaim); + if (ret != 1) + return 0; if (dontclaim) return SAID_PARTIAL_MATCH; - else - return 1; // full match + + return 1; } @@ -2441,22 +996,19 @@ static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *sai /**** Main code ****/ /*******************/ -int said(EngineState *s, byte *spec, bool verbose) { +int said(EngineState *s, const byte *spec, bool verbose) { int retval; Vocabulary *voc = g_sci->getVocabulary(); - parse_tree_node_t *parse_tree_ptr = voc->_parserNodes; + ParseTreeNode *parse_tree_ptr = voc->_parserNodes; if (voc->parserIsValid) { - if (said_parse_spec(spec)) { - printf("Offending spec was: "); - voc->decipherSaidBlock(spec); + if (said_parse_spec(spec)) return SAID_NO_MATCH; - } if (verbose) - vocab_dump_parse_tree("Said-tree", said_tree); // Nothing better to do yet - retval = augment_parse_nodes(parse_tree_ptr, &(said_tree[0])); + vocab_dump_parse_tree("Said-tree", said_tree); + retval = augment_parse_nodes(parse_tree_ptr, said_tree); if (!retval) return SAID_NO_MATCH; @@ -2470,15 +1022,108 @@ int said(EngineState *s, byte *spec, bool verbose) { } -#ifdef SAID_DEBUG_PROGRAM -int main (int argc, char *argv) { - byte block[] = {0x01, 0x00, 0xf8, 0xf5, 0x02, 0x01, 0xf6, 0xf2, 0x02, 0x01, 0xf2, 0x01, 0x03, 0xff}; - EngineState s; +/* + +Some test expressions for in the ScummVM debugging console, using +Codename: ICEMAN's vocabulary: + + + +said green board & [!*] / 8af < 1f6 +True + +said get green board & [!*] / 8af < 1f6 +False + +said green board & [!*] / 8af [< 1f6 ] +True + +said climb up & 19b , 426 [< 142 ] [/ 81e ] +True + +said climb up ladder & 19b , 426 [< 142 ] [/ 81e ] +True + +said climb down & 19b , 426 [< 142 ] [/ 81e ] +False + +said climb up tree & 19b , 426 [< 142 ] [/ 81e ] +False + +said climb up & 19b , 446 , 426 [< 143 ] [/ 81e ] +False + +said climb down & 19b , 446 , 426 [< 143 ] [/ 81e ] +True + +said use green device & 1a5 / 8c1 [< 21d ] +False + +said use electronic device & 1a5 / 8c1 [< 21d ] +True + +said use device & 1a5 / 8c1 [< 21d ] +True + +said eat & 429 [/ !* ] +True + +said eat ladder & 429 [/ !* ] +False + +said look at the ladder & 3f8 / 81e [< !* ] +True + +said look at the green ladder & 3f8 / 81e [< !* ] +False + +said look green book & / 7f6 [< 8d2 ] +False + +said look green book & 3f8 [< ca ] +True + +said get a blue board for the green ladder & 3f9 / 8af [ < 1f6 ] / 81e < 1f6 +False + +said get a board for the green ladder & 3f9 / 8af [ < 1f6 ] / 81e < 1f6 +True + +said get a blue board & 3f9 / 8af [ < 1f6 ] +False + +said get up & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ] +True + +said get left & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ] +False + +said look down & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ] +True + +said get & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ] +True + +said put washer on shaft & 455 , ( 3fa < cb ) / 8c6 +True + +said depth correct & [!*] < 8b1 / 22 +True + +said depth acknowledged & / 46d , 460 , 44d < 8b1 +True + +said depth confirmed & / 46d , 460 , 44d < 8b1 +True + +said depth attained & / 46d , 460 , 44d < 8b1 +True + + +*/ + + - s.parser_valid = 1; - said(&s, block); -} -#endif } // End of namespace Sci diff --git a/engines/sci/parser/said.y b/engines/sci/parser/said.y deleted file mode 100644 index cbb2ff3e62..0000000000 --- a/engines/sci/parser/said.y +++ /dev/null @@ -1,839 +0,0 @@ -%{ -/* ScummVM - Graphic Adventure Engine - * - * ScummVM is the legal property of its developers, whose names - * are too numerous to list here. Please refer to the COPYRIGHT - * file distributed with this source distribution. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * $URL$ - * $Id$ - * - */ - -#include "sci/engine/state.h" - - -// Bison generates an empty switch statement that gives a warning in MSVC. -// This disables that warning. -#ifdef _MSC_VER -#pragma warning(disable:4065) -#endif - - -namespace Sci { - -#define SAID_BRANCH_NULL 0 - -#define MAX_SAID_TOKENS 128 - -// Maximum number of words to be expected in a parsed sentence -#define AUGMENT_MAX_WORDS 64 - - -#define ANYWORD 0xfff - -#define WORD_TYPE_BASE 0x141 -#define WORD_TYPE_REF 0x144 -#define WORD_TYPE_SYNTACTIC_SUGAR 0x145 - -#define AUGMENT_SENTENCE_PART_BRACKETS 0x152 - -// Minor numbers -#define AUGMENT_SENTENCE_MINOR_MATCH_PHRASE 0x14c -#define AUGMENT_SENTENCE_MINOR_MATCH_WORD 0x153 -#define AUGMENT_SENTENCE_MINOR_RECURSE 0x144 -#define AUGMENT_SENTENCE_MINOR_PARENTHESES 0x14f - - -#undef YYDEBUG /*1*/ -//#define SAID_DEBUG*/ -//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION // uncomment to debug parse tree augmentation - - -#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION -#define scidprintf printf -#else -void print_nothing(...) { } -#define scidprintf print_nothing -#endif - - -static char *said_parse_error; - -static int said_token; -static int said_tokens_nr; -static int said_tokens[MAX_SAID_TOKENS]; -static int said_blessed; // increminated by said_top_branch - -static int said_tree_pos; // Set to 0 if we're out of space -#define SAID_TREE_START 4; // Reserve space for the 4 top nodes - -#define VALUE_IGNORE -424242 - -static parse_tree_node_t said_tree[VOCAB_TREE_NODES]; - -typedef int wgroup_t; -typedef int tree_t; -typedef int said_spec_t; - -static tree_t said_aug_branch(int, int, tree_t, tree_t); -static tree_t said_attach_branch(tree_t, tree_t); -/* -static tree_t said_wgroup_branch(wgroup_t); -*/ -static said_spec_t said_top_branch(tree_t); -static tree_t said_paren(tree_t, tree_t); -static tree_t said_value(int, tree_t); -static tree_t said_terminal(int); - -static int yylex(); - -static int yyerror(const char *s) { - said_parse_error = strdup(s); - return 1; /* Abort */ -} - -%} - -%token WGROUP /* Word group */ -%token YY_COMMA /* 0xf0 */ -%token YY_AMP /* 0xf1 */ -%token YY_SLASH /* 0xf2 */ -%token YY_PARENO /* 0xf3 */ -%token YY_PARENC /* 0xf4 */ -%token YY_BRACKETSO /* 0xf5 */ -%token YY_BRACKETSC /* 0xf6 */ -%token YY_HASH /* 0xf7 */ -%token YY_LT /* 0xf8 */ -%token YY_GT /* 0xf9 */ -%token YY_BRACKETSO_LT /* special token used to imitate LR(2) behaviour */ -%token YY_BRACKETSO_SLASH /* special token used to imitate LR(2) behaviour */ -%token YY_LT_BRACKETSO /* special token used to imitate LR(2) behaviour */ -%token YY_LT_PARENO /* special token used to imitate LR(2) behaviour */ - -%% - -saidspec : leftspec optcont - { $$ = said_top_branch(said_attach_branch($1, $2)); } - | leftspec midspec optcont - { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, $3))); } - | leftspec midspec rightspec optcont - { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, said_attach_branch($3, $4)))); } - ; - - -optcont : /* empty */ - { $$ = SAID_BRANCH_NULL; } - | YY_GT - { $$ = said_paren(said_value(0x14b, said_value(0xf900, said_terminal(0xf900))), SAID_BRANCH_NULL); } - ; - - - -leftspec : /* empty */ - { $$ = SAID_BRANCH_NULL; } - | expr - { $$ = said_paren(said_value(0x141, said_value(0x149, $1)), SAID_BRANCH_NULL); } - ; - - - -midspec : YY_SLASH expr - { $$ = said_aug_branch(0x142, 0x14a, $2, SAID_BRANCH_NULL); } - | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC - { $$ = said_aug_branch(0x152, 0x142, said_aug_branch(0x142, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - | YY_SLASH - { $$ = SAID_BRANCH_NULL; } - ; - - - -rightspec : YY_SLASH expr - { $$ = said_aug_branch(0x143, 0x14a, $2, SAID_BRANCH_NULL); } - | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC - { $$ = said_aug_branch(0x152, 0x143, said_aug_branch(0x143, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - | YY_SLASH - { $$ = SAID_BRANCH_NULL; } - ; - - -word : WGROUP - { $$ = said_paren(said_value(0x141, said_value(0x153, said_terminal($1))), SAID_BRANCH_NULL); } - ; - - -cwordset : wordset - { $$ = said_aug_branch(0x141, 0x14f, $1, SAID_BRANCH_NULL); } - | YY_BRACKETSO wordset YY_BRACKETSC - { $$ = said_aug_branch(0x141, 0x14f, said_aug_branch(0x152, 0x14c, said_aug_branch(0x141, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - ; - - -wordset : word - { $$ = $1; } - | YY_PARENO expr YY_PARENC - { $$ = said_aug_branch(0x141, 0x14c, $2, SAID_BRANCH_NULL); } - | wordset YY_COMMA wordset - { $$ = said_attach_branch($1, $3); } - | wordset YY_BRACKETSO_LT wordrefset YY_BRACKETSC - { $$ = said_attach_branch($1, $3); } - | wordset YY_COMMA YY_BRACKETSO wordset YY_BRACKETSC - { $$ = said_attach_branch($1, $3); } - ; - - -expr : cwordset cwordrefset - { $$ = said_attach_branch($1, $2); } - | cwordset - { $$ = $1; } - | cwordrefset - { $$ = $1; } - ; - - -cwordrefset : wordrefset - { $$ = $1; } - | YY_BRACKETSO_LT wordrefset YY_BRACKETSC - { $$ = said_aug_branch(0x152, 0x144, $2, SAID_BRANCH_NULL); } - | wordrefset YY_BRACKETSO_LT wordrefset YY_BRACKETSC - { $$ = said_attach_branch($1, said_aug_branch(0x152, 0x144, $3, SAID_BRANCH_NULL)); } - ; - - -wordrefset : YY_LT word recref - { $$ = said_aug_branch(0x144, 0x14f, $2, $3); } - | YY_LT_PARENO YY_PARENO expr YY_PARENC - { $$ = said_aug_branch(0x144, 0x14f, said_aug_branch(0x141, 0x144, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - | YY_LT wordset - { $$ = said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL); } - | YY_LT_BRACKETSO YY_BRACKETSO wordset YY_BRACKETSC - { $$ = said_aug_branch(0x152, 0x144, said_aug_branch(0x144, 0x14f, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - ; - - -recref : YY_LT wordset recref - { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), $3); } - | YY_LT wordset - { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } - | YY_LT_PARENO YY_PARENO expr YY_PARENC - { $$ = said_aug_branch(0x141, 0x14c, $2, SAID_BRANCH_NULL); } - ; - -%% - -int parse_yy_token_lookup[] = {YY_COMMA, YY_AMP, YY_SLASH, YY_PARENO, YY_PARENC, YY_BRACKETSO, YY_BRACKETSC, YY_HASH, YY_LT, YY_GT}; - -static int yylex() { - int retval = said_tokens[said_token++]; - - if (retval < SAID_LONG(SAID_FIRST)) { - yylval = retval; - retval = WGROUP; - } else { - retval >>= 8; - - if (retval == SAID_TERM) - retval = 0; - else { - assert(retval >= SAID_FIRST); - retval = parse_yy_token_lookup[retval - SAID_FIRST]; - if (retval == YY_BRACKETSO) { - if ((said_tokens[said_token] >> 8) == SAID_LT) - retval = YY_BRACKETSO_LT; - else - if ((said_tokens[said_token] >> 8) == SAID_SLASH) - retval = YY_BRACKETSO_SLASH; - } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_BRACKO) { - retval = YY_LT_BRACKETSO; - } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_PARENO) { - retval = YY_LT_PARENO; - } - } - } - - return retval; -} - -static int said_next_node() { - return ((said_tree_pos == 0) || (said_tree_pos >= VOCAB_TREE_NODES)) ? said_tree_pos = 0 : said_tree_pos++; -} - -#define SAID_NEXT_NODE said_next_node() - -static int said_leaf_node(tree_t pos, int value) { - said_tree[pos].type = kParseTreeLeafNode; - - if (value != VALUE_IGNORE) - said_tree[pos].content.value = value; - - return pos; -} - -static int said_branch_node(tree_t pos, int left, int right) { - said_tree[pos].type = kParseTreeBranchNode; - - if (left != VALUE_IGNORE) - said_tree[pos].content.branches[0] = left; - - if (right != VALUE_IGNORE) - said_tree[pos].content.branches[1] = right; - - return pos; -} - -static tree_t said_paren(tree_t t1, tree_t t2) { - if (t1) - return said_branch_node(SAID_NEXT_NODE, t1, t2); - else - return t2; -} - -static tree_t said_value(int val, tree_t t) { - return said_branch_node(SAID_NEXT_NODE, said_leaf_node(SAID_NEXT_NODE, val), t); - -} - -static tree_t said_terminal(int val) { - return said_leaf_node(SAID_NEXT_NODE, val); -} - -static tree_t said_aug_branch(int n1, int n2, tree_t t1, tree_t t2) { - int retval; - - retval = said_branch_node(SAID_NEXT_NODE, - said_branch_node(SAID_NEXT_NODE, - said_leaf_node(SAID_NEXT_NODE, n1), - said_branch_node(SAID_NEXT_NODE, - said_leaf_node(SAID_NEXT_NODE, n2), - t1) - ), - t2); - -#ifdef SAID_DEBUG - fprintf(stderr, "AUG(0x%x, 0x%x, [%04x], [%04x]) = [%04x]\n", n1, n2, t1, t2, retval); -#endif - - return retval; -} - -static tree_t said_attach_branch(tree_t base, tree_t attacheant) { -#ifdef SAID_DEBUG - fprintf(stderr, "ATT2([%04x], [%04x]) = [%04x]\n", base, attacheant, base); -#endif - - if (!attacheant) - return base; - if (!base) - return attacheant; - - if (!base) - return 0; // Happens if we're out of space - - said_branch_node(base, VALUE_IGNORE, attacheant); - - return base; -} - -static said_spec_t said_top_branch(tree_t first) { -#ifdef SAID_DEBUG - fprintf(stderr, "TOP([%04x])\n", first); -#endif - said_branch_node(0, 1, 2); - said_leaf_node(1, 0x141); // Magic number #1 - said_branch_node(2, 3, first); - said_leaf_node(3, 0x13f); // Magic number #2 - - ++said_blessed; - - return 0; -} - -static int said_parse_spec(byte *spec) { - int nextitem; - - said_parse_error = NULL; - said_token = 0; - said_tokens_nr = 0; - said_blessed = 0; - - said_tree_pos = SAID_TREE_START; - - do { - nextitem = *spec++; - if (nextitem < SAID_FIRST) - said_tokens[said_tokens_nr++] = nextitem << 8 | *spec++; - else - said_tokens[said_tokens_nr++] = SAID_LONG(nextitem); - - } while ((nextitem != SAID_TERM) && (said_tokens_nr < MAX_SAID_TOKENS)); - - if (nextitem == SAID_TERM) - yyparse(); - else { - warning("SAID spec is too long"); - return 1; - } - - if (said_parse_error) { - warning("Error while parsing SAID spec: %s", said_parse_error); - free(said_parse_error); - return 1; - } - - if (said_tree_pos == 0) { - warning("Out of tree space while parsing SAID spec"); - return 1; - } - - if (said_blessed != 1) { - warning("Found multiple top branches"); - return 1; - } - - return 0; -} - -/**********************/ -/**** Augmentation ****/ -/**********************/ - -// primitive functions - -#define AUG_READ_BRANCH(a, br, p) \ - if (tree[p].type != kParseTreeBranchNode) \ - return 0; \ - a = tree[p].content.branches[br]; - -#define AUG_READ_VALUE(a, p) \ - if (tree[p].type != kParseTreeLeafNode) \ - return 0; \ - a = tree[p].content.value; - -#define AUG_ASSERT(i) \ - if (!i) return 0; - -static int aug_get_next_sibling(parse_tree_node_t *tree, int pos, int *first, int *second) { - // Returns the next sibling relative to the specified position in 'tree', - // sets *first and *second to its augment node values, returns the new position - // or 0 if there was no next sibling - int seek, valpos; - - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(seek, 0, pos); - AUG_ASSERT(seek); - - // Now retrieve first value - AUG_READ_BRANCH(valpos, 0, seek); - AUG_ASSERT(valpos); - AUG_READ_VALUE(*first, valpos); - - // Get second value - AUG_READ_BRANCH(seek, 1, seek); - AUG_ASSERT(seek); - AUG_READ_BRANCH(valpos, 0, seek); - AUG_ASSERT(valpos); - AUG_READ_VALUE(*second, valpos); - - return pos; -} - -static int aug_get_wgroup(parse_tree_node_t *tree, int pos) { - // Returns 0 if pos in tree is not the root of a 3-element list, otherwise - // it returns the last element (which, in practice, is the word group - int val; - - AUG_READ_BRANCH(pos, 0, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - AUG_READ_VALUE(val, pos); - - return val; -} - -static int aug_get_base_node(parse_tree_node_t *tree) { - int startpos = 0; - AUG_READ_BRANCH(startpos, 1, startpos); - - return startpos; -} - -// semi-primitive functions - -static int aug_get_first_child(parse_tree_node_t *tree, int pos, int *first, int *second) { - // like aug_get_next_sibling, except that it recurses into the tree and - // finds the first child (usually *not* Ayanami Rei) of the current branch - // rather than its next sibling. - AUG_READ_BRANCH(pos, 0, pos); - AUG_ASSERT(pos); - AUG_READ_BRANCH(pos, 1, pos); - AUG_ASSERT(pos); - - return aug_get_next_sibling(tree, pos, first, second); -} - -static void aug_find_words_recursively(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr, - int *ref_words, int *ref_words_nr, int maxwords, int refbranch) { - // Finds and lists all base (141) and reference (144) words */ - int major, minor; - int word; - int pos = aug_get_first_child(tree, startpos, &major, &minor); - - //if (major == WORD_TYPE_REF) - // refbranch = 1; - - while (pos) { - if ((word = aug_get_wgroup(tree, pos))) { // found a word - if (!refbranch && major == WORD_TYPE_BASE) { - if ((*base_words_nr) == maxwords) { - warning("Out of regular words"); - return; // return gracefully - } - - base_words[*base_words_nr] = word; // register word - ++(*base_words_nr); - - } - if (major == WORD_TYPE_REF || refbranch) { - if ((*ref_words_nr) == maxwords) { - warning("Out of reference words"); - return; // return gracefully - } - - ref_words[*ref_words_nr] = word; // register word - ++(*ref_words_nr); - - } - if (major != WORD_TYPE_SYNTACTIC_SUGAR && major != WORD_TYPE_BASE && major != WORD_TYPE_REF) - warning("aug_find_words_recursively(): Unknown word type %03x", major); - - } else // Did NOT find a word group: Attempt to recurse - aug_find_words_recursively(tree, pos, base_words, base_words_nr, - ref_words, ref_words_nr, maxwords, refbranch || major == WORD_TYPE_REF); - - pos = aug_get_next_sibling(tree, pos, &major, &minor); - } -} - - -static void aug_find_words(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr, - int *ref_words, int *ref_words_nr, int maxwords) { - // initializing wrapper for aug_find_words_recursively() - *base_words_nr = 0; - *ref_words_nr = 0; - - aug_find_words_recursively(tree, startpos, base_words, base_words_nr, ref_words, ref_words_nr, maxwords, 0); -} - - -static int aug_contains_word(int *list, int length, int word) { - int i; - - if (word == ANYWORD) - return (length); - - for (i = 0; i < length; i++) - if (list[i] == word) - return 1; - - return 0; -} - - -static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_branch, int major, int minor, int *base_words, int base_words_nr, - int *ref_words, int ref_words_nr); - -static int augment_match_expression_p(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_basepos, int major, int minor, - int *base_words, int base_words_nr, int *ref_words, int ref_words_nr) { - int cmajor, cminor, cpos; - cpos = aug_get_first_child(saidt, augment_pos, &cmajor, &cminor); - if (!cpos) { - warning("augment_match_expression_p(): Empty condition"); - return 1; - } - - scidprintf("Attempting to match (%03x %03x (%03x %03x\n", major, minor, cmajor, cminor); - - if ((major == WORD_TYPE_BASE) && (minor == AUGMENT_SENTENCE_MINOR_RECURSE)) - return augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr); - - switch (major) { - - case WORD_TYPE_BASE: - while (cpos) { - if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { - int word = aug_get_wgroup(saidt, cpos); - scidprintf("Looking for word %03x\n", word); - - if (aug_contains_word(base_words, base_words_nr, word)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { - if (augment_sentence_expression(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { - int gc_major, gc_minor; - int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); - - while (gchild) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major, - minor, base_words, base_words_nr, - ref_words, ref_words_nr)) - return 1; - gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); - } - } else - warning("augment_match_expression_p(): Unknown type 141 minor number %3x", cminor); - - cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); - - } - break; - - case WORD_TYPE_REF: - while (cpos) { - if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { - int word = aug_get_wgroup(saidt, cpos); - scidprintf("Looking for refword %03x\n", word); - - if (aug_contains_word(ref_words, ref_words_nr, word)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; - } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { - int gc_major, gc_minor; - int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); - - while (gchild) { - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major, - minor, base_words, base_words_nr, - ref_words, ref_words_nr)) - return 1; - gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); - } - } else - warning("augment_match_expression_p(): Unknown type 144 minor number %3x", cminor); - - cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); - - } - break; - - case AUGMENT_SENTENCE_PART_BRACKETS: - if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor, - base_words, base_words_nr, ref_words, ref_words_nr)) - return 1; - - scidprintf("Didn't match subexpression; checking sub-bracked predicate %03x\n", cmajor); - - switch (cmajor) { - case WORD_TYPE_BASE: - if (!base_words_nr) - return 1; - break; - - case WORD_TYPE_REF: - if (!ref_words_nr) - return 1; - break; - - default: - warning("augment_match_expression_p(): (subp1) Unkonwn sub-bracket predicate %03x", cmajor); - } - - break; - - default: - warning("augment_match_expression_p(): Unknown predicate %03x", major); - - } - - scidprintf("augment_match_expression_p(): Generic failure\n"); - - return 0; -} - -static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, - int parse_branch, int major, int minor, int *base_words, int base_words_nr, - int *ref_words, int ref_words_nr) { - int check_major, check_minor; - int check_pos = aug_get_first_child(saidt, augment_pos, &check_major, &check_minor); - do { - if (!(augment_match_expression_p(saidt, check_pos, parset, parse_branch, check_major, check_minor, - base_words, base_words_nr, ref_words, ref_words_nr))) - return 0; - } while ((check_pos = aug_get_next_sibling(saidt, check_pos, &check_major, &check_minor))); - - return 1; -} - -static int augment_sentence_part(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, int parse_basepos, int major, int minor) { - int pmajor, pminor; - int parse_branch = parse_basepos; - int optional = 0; - int foundwords = 0; - - scidprintf("Augmenting (%03x %03x\n", major, minor); - - if (major == AUGMENT_SENTENCE_PART_BRACKETS) { // '[/ foo]' is true if '/foo' or if there - // exists no x for which '/x' is true - if ((augment_pos = aug_get_first_child(saidt, augment_pos, &major, &minor))) { - scidprintf("Optional part: Now augmenting (%03x %03x\n", major, minor); - optional = 1; - } else { - scidprintf("Matched empty optional expression\n"); - return 1; - } - } - - if ((major < 0x141) || (major > 0x143)) { - scidprintf("augment_sentence_part(): Unexpected sentence part major number %03x\n", major); - return 0; - } - - while ((parse_branch = aug_get_next_sibling(parset, parse_branch, &pmajor, &pminor))) { - if (pmajor == major) { // found matching sentence part - int success; - int base_words_nr; - int ref_words_nr; - int base_words[AUGMENT_MAX_WORDS]; - int ref_words[AUGMENT_MAX_WORDS]; -#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION - int i; -#endif - - scidprintf("Found match with pminor = %03x\n", pminor); - aug_find_words(parset, parse_branch, base_words, &base_words_nr, ref_words, &ref_words_nr, AUGMENT_MAX_WORDS); - foundwords |= (ref_words_nr | base_words_nr); -#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION - printf("%d base words:", base_words_nr); - for (i = 0; i < base_words_nr; i++) - printf(" %03x", base_words[i]); - printf("\n%d reference words:", ref_words_nr); - for (i = 0; i < ref_words_nr; i++) - printf(" %03x", ref_words[i]); - printf("\n"); -#endif - - success = augment_sentence_expression(saidt, augment_pos, parset, parse_basepos, major, minor, - base_words, base_words_nr, ref_words, ref_words_nr); - - if (success) { - scidprintf("SUCCESS on augmenting (%03x %03x\n", major, minor); - return 1; - } - } - } - - if (optional && (foundwords == 0)) { - scidprintf("Found no words and optional branch => SUCCESS on augmenting (%03x %03x\n", major, minor); - return 1; - } - scidprintf("FAILURE on augmenting (%03x %03x\n", major, minor); - - return 0; -} - -static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *saidt) { - int augment_basepos = 0; - int parse_basepos; - int major, minor; - int dontclaim = 0; - - parse_basepos = aug_get_base_node(parset); - if (!parse_basepos) { - warning("augment_parse_nodes(): Parse tree is corrupt"); - return 0; - } - - augment_basepos = aug_get_base_node(saidt); - if (!augment_basepos) { - warning("augment_parse_nodes(): Said tree is corrupt"); - return 0; - } - - while ((augment_basepos = aug_get_next_sibling(saidt, augment_basepos, &major, &minor))) { - if ((major == 0x14b) && (minor == SAID_LONG(SAID_GT))) - dontclaim = 1; // special case - else // normal sentence part - if (!(augment_sentence_part(saidt, augment_basepos, parset, parse_basepos, major, minor))) { - scidprintf("Returning failure\n"); - return 0; // fail - } - } - - scidprintf("Returning success with dontclaim=%d\n", dontclaim); - - if (dontclaim) - return SAID_PARTIAL_MATCH; - else - return 1; // full match -} - - -/*******************/ -/**** Main code ****/ -/*******************/ - -int said(EngineState *s, byte *spec, bool verbose) { - int retval; - Vocabulary *voc = g_sci->getVocabulary(); - - parse_tree_node_t *parse_tree_ptr = voc->_parserNodes; - - if (voc->parserIsValid) { - if (said_parse_spec(spec)) { - printf("Offending spec was: "); - voc->decipherSaidBlock(spec); - return SAID_NO_MATCH; - } - - if (verbose) - vocab_dump_parse_tree("Said-tree", said_tree); // Nothing better to do yet - retval = augment_parse_nodes(parse_tree_ptr, &(said_tree[0])); - - if (!retval) - return SAID_NO_MATCH; - else if (retval != SAID_PARTIAL_MATCH) - return SAID_FULL_MATCH; - else - return SAID_PARTIAL_MATCH; - } - - return SAID_NO_MATCH; -} - - -#ifdef SAID_DEBUG_PROGRAM -int main (int argc, char *argv) { - byte block[] = {0x01, 0x00, 0xf8, 0xf5, 0x02, 0x01, 0xf6, 0xf2, 0x02, 0x01, 0xf2, 0x01, 0x03, 0xff}; - EngineState s; - - s.parser_valid = 1; - said(&s, block); -} -#endif - -} // End of namespace Sci diff --git a/engines/sci/parser/vocabulary.cpp b/engines/sci/parser/vocabulary.cpp index e48a9cdfda..20436d5b30 100644 --- a/engines/sci/parser/vocabulary.cpp +++ b/engines/sci/parser/vocabulary.cpp @@ -33,18 +33,34 @@ namespace Sci { -Vocabulary::Vocabulary(ResourceManager *resMan) : _resMan(resMan) { +Vocabulary::Vocabulary(ResourceManager *resMan, bool foreign) : _resMan(resMan), _foreign(foreign) { _parserRules = NULL; - _vocabVersion = kVocabularySCI0; memset(_parserNodes, 0, sizeof(_parserNodes)); // Mark parse tree as unused _parserNodes[0].type = kParseTreeLeafNode; - _parserNodes[0].content.value = 0; + _parserNodes[0].value = 0; _synonyms.clear(); // No synonyms debug(2, "Initializing vocabulary"); + if (_resMan->testResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB))) { + _vocabVersion = kVocabularySCI0; + _resourceIdWords = VOCAB_RESOURCE_SCI0_MAIN_VOCAB; + _resourceIdSuffixes = VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB; + _resourceIdBranches = VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES; + } else { + _vocabVersion = kVocabularySCI1; + _resourceIdWords = VOCAB_RESOURCE_SCI1_MAIN_VOCAB; + _resourceIdSuffixes = VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB; + _resourceIdBranches = VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES; + } + + if (_foreign) { + _resourceIdWords += 10; + _resourceIdSuffixes += 10; + _resourceIdBranches += 10; + } if (getSciVersion() <= SCI_VERSION_1_EGA && loadParserWords()) { loadSuffixes(); @@ -66,27 +82,46 @@ Vocabulary::~Vocabulary() { freeSuffixes(); } -bool Vocabulary::loadParserWords() { +void Vocabulary::reset() { + parserIsValid = false; // Invalidate parser + parser_event = NULL_REG; // Invalidate parser event + parser_base = make_reg(g_sci->getEngineState()->_segMan->getSysStringsSegment(), SYS_STRING_PARSER_BASE); +} - char currentword[256] = ""; // They're not going to use words longer than 255 ;-) - int currentwordpos = 0; +bool Vocabulary::loadParserWords() { + char currentWord[VOCAB_MAX_WORDLENGTH] = ""; + int currentWordPos = 0; // First try to load the SCI0 vocab resource. - Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB), 0); + Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdWords), 0); if (!resource) { - warning("SCI0: Could not find a main vocabulary, trying SCI01"); - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_MAIN_VOCAB), 0); - _vocabVersion = kVocabularySCI1; + warning("Could not find a main vocabulary"); + return false; // NOT critical: SCI1 games and some demos don't have one! } - if (!resource) { - warning("SCI1: Could not find a main vocabulary"); - return false; // NOT critical: SCI1 games and some demos don't have one! + VocabularyVersions resourceType = _vocabVersion; + + if (resourceType == kVocabularySCI0) { + if (resource->size < 26 * 2) { + warning("Invalid main vocabulary encountered: Much too small"); + return false; + } + // Check the alphabet-offset table for any content + int alphabetNr; + for (alphabetNr = 0; alphabetNr < 26; alphabetNr++) { + if (READ_LE_UINT16(resource->data + alphabetNr * 2)) + break; + } + // If all of them were empty, we are definitely seeing SCI01 vocab in disguise (e.g. pq2 japanese) + if (alphabetNr == 26) { + warning("SCI0: Found SCI01 vocabulary in disguise"); + resourceType = kVocabularySCI1; + } } unsigned int seeker; - if (_vocabVersion == kVocabularySCI1) + if (resourceType == kVocabularySCI1) seeker = 255 * 2; // vocab.900 starts with 255 16-bit pointers which we don't use else seeker = 26 * 2; // vocab.000 starts with 26 16-bit pointers which we don't use @@ -102,13 +137,13 @@ bool Vocabulary::loadParserWords() { while (seeker < resource->size) { byte c; - currentwordpos = resource->data[seeker++]; // Parts of previous words may be re-used + currentWordPos = resource->data[seeker++]; // Parts of previous words may be re-used - if (_vocabVersion == kVocabularySCI1) { + if (resourceType == kVocabularySCI1) { c = 1; - while (seeker < resource->size && currentwordpos < 255 && c) { + while (seeker < resource->size && currentWordPos < 255 && c) { c = resource->data[seeker++]; - currentword[currentwordpos++] = c; + currentWord[currentWordPos++] = c; } if (seeker == resource->size) { warning("SCI1: Vocabulary not usable, disabling"); @@ -118,11 +153,11 @@ bool Vocabulary::loadParserWords() { } else { do { c = resource->data[seeker++]; - currentword[currentwordpos++] = c & 0x7f; // 0x80 is used to terminate the string + currentWord[currentWordPos++] = c & 0x7f; // 0x80 is used to terminate the string } while (c < 0x80); } - currentword[currentwordpos] = 0; + currentWord[currentWordPos] = 0; // Now decode class and group: c = resource->data[seeker + 1]; @@ -131,7 +166,7 @@ bool Vocabulary::loadParserWords() { newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8); // Add the word to the list - _parserWords[currentword] = newWord; + _parserWords[currentWord] = newWord; seeker += 3; } @@ -142,23 +177,20 @@ bool Vocabulary::loadParserWords() { const char *Vocabulary::getAnyWordFromGroup(int group) { if (group == VOCAB_MAGIC_NUMBER_GROUP) return "{number}"; + if (group == VOCAB_MAGIC_NOTHING_GROUP) + return "{nothing}"; - for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) + for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) { if (i->_value._group == group) return i->_key.c_str(); + } return "{invalid}"; } bool Vocabulary::loadSuffixes() { // Determine if we can find a SCI1 suffix vocabulary first - Resource* resource = NULL; - - if (_vocabVersion == kVocabularySCI0) - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB), 1); - else - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB), 1); - + Resource* resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdSuffixes), 1); if (!resource) return false; // No vocabulary found @@ -171,7 +203,7 @@ bool Vocabulary::loadSuffixes() { suffix.alt_suffix_length = strlen(suffix.alt_suffix); seeker += suffix.alt_suffix_length + 1; // Hit end of string - suffix.class_mask = (int16)READ_BE_UINT16(resource->data + seeker); + suffix.result_class = (int16)READ_BE_UINT16(resource->data + seeker); seeker += 2; // Beginning of next string - skip leading '*' @@ -181,7 +213,7 @@ bool Vocabulary::loadSuffixes() { suffix.word_suffix_length = strlen(suffix.word_suffix); seeker += suffix.word_suffix_length + 1; - suffix.result_class = (int16)READ_BE_UINT16(resource->data + seeker); + suffix.class_mask = (int16)READ_BE_UINT16(resource->data + seeker); seeker += 3; // Next entry _parserSuffixes.push_back(suffix); @@ -191,13 +223,7 @@ bool Vocabulary::loadSuffixes() { } void Vocabulary::freeSuffixes() { - Resource* resource = NULL; - - if (_vocabVersion == kVocabularySCI0) - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB), 0); - else - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB), 0); - + Resource* resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdSuffixes), 0); if (resource) _resMan->unlockResource(resource); @@ -205,12 +231,7 @@ void Vocabulary::freeSuffixes() { } bool Vocabulary::loadBranches() { - Resource *resource = NULL; - - if (_vocabVersion == kVocabularySCI0) - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES), 0); - else - resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES), 0); + Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdBranches), 0); _parserBranches.clear(); @@ -243,7 +264,7 @@ bool Vocabulary::loadBranches() { return true; } - +// we assume that *word points to an already lowercased word ResultWord Vocabulary::lookupWord(const char *word, int word_len) { Common::String tempword(word, word_len); @@ -270,7 +291,7 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) { int suff_index = word_len - suffix->alt_suffix_length; // Offset of the start of the suffix - if (scumm_strnicmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched! + if (strncmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched! // Terminate word at suffix start position...: Common::String tempword2(word, MIN(word_len, suff_index)); @@ -300,82 +321,109 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) { return retval; } -void Vocabulary::decipherSaidBlock(byte *addr) { - byte nextitem; +void Vocabulary::debugDecipherSaidBlock(const byte *addr) { + bool first = true; + uint16 nextItem; do { - nextitem = *addr++; - - if (nextitem < 0xf0) { - nextitem = nextitem << 8 | *addr++; - printf(" %s[%03x]", getAnyWordFromGroup(nextitem), nextitem); - - nextitem = 42; // Make sure that group 0xff doesn't abort - } else switch (nextitem) { - case 0xf0: - printf(" ,"); - break; - case 0xf1: - printf(" &"); - break; - case 0xf2: - printf(" /"); - break; - case 0xf3: - printf(" ("); - break; - case 0xf4: - printf(" )"); - break; - case 0xf5: - printf(" ["); - break; - case 0xf6: - printf(" ]"); - break; - case 0xf7: - printf(" #"); - break; - case 0xf8: - printf(" <"); - break; - case 0xf9: - printf(" >"); - break; - case 0xff: - break; + nextItem = *addr++; + if (nextItem != 0xff) { + if ((!first) && (nextItem != 0xf0)) + printf(" "); + first = false; + + if (nextItem < 0xf0) { + nextItem = nextItem << 8 | *addr++; + printf("%s{%03x}", getAnyWordFromGroup(nextItem), nextItem); + + nextItem = 0; // Make sure that group 0xff doesn't abort + } else switch (nextItem) { + case 0xf0: + printf(","); + break; + case 0xf1: + printf("&"); + break; + case 0xf2: + printf("/"); + break; + case 0xf3: + printf("("); + break; + case 0xf4: + printf(")"); + break; + case 0xf5: + printf("["); + break; + case 0xf6: + printf("]"); + break; + case 0xf7: + printf("#"); + break; + case 0xf8: + printf("<"); + break; + case 0xf9: + printf(">"); + break; + case 0xff: + break; } - } while (nextitem != 0xff); - - printf("\n"); + } + } while (nextItem != 0xff); } +static const byte lowerCaseMap[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, // 0x00 + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, // 0x10 + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, // 0x20 + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, // 0x30 + 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', // 0x40 + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, // 0x50 + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, // 0x60 + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, // 0x70 + 0x87, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x84, 0x86, // 0x80 + //^^ ^^^^ ^^^^ + 0x82, 0x91, 0x91, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x94, 0x81, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, // 0x90 + //^^ ^^^^ ^^^^ ^^^^ + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa4, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, // 0xa0 + // ^^^^ + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, // 0xb0 + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, // 0xc0 + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, // 0xd0 + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, // 0xe0 + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0 +}; + bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, char **error) { - const char *lastword = sentence; + char currentWord[VOCAB_MAX_WORDLENGTH] = ""; int pos_in_sentence = 0; - char c; - int wordlen = 0; + unsigned char c; + int wordLen = 0; *error = NULL; do { c = sentence[pos_in_sentence++]; - - if (isalnum(c) || (c == '-' && wordlen)) - ++wordlen; + if (isalnum(c) || (c == '-' && wordLen) || (c >= 0x80)) { + currentWord[wordLen] = lowerCaseMap[c]; + ++wordLen; + } // Continue on this word */ // Words may contain a '-', but may not // start with one. else { - if (wordlen) { // Finished a word? + if (wordLen) { // Finished a word? - ResultWord lookup_result = lookupWord(lastword, wordlen); + ResultWord lookup_result = lookupWord(currentWord, wordLen); // Look it up if (lookup_result._class == -1) { // Not found? - *error = (char *)calloc(wordlen + 1, 1); - strncpy(*error, lastword, wordlen); // Set the offending word + *error = (char *)calloc(wordLen + 1, 1); + strncpy(*error, currentWord, wordLen); // Set the offending word retval.clear(); return false; // And return with error } @@ -384,8 +432,7 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch retval.push_back(lookup_result); } - lastword = sentence + pos_in_sentence; - wordlen = 0; + wordLen = 0; } } while (c); // Until terminator is hit @@ -394,7 +441,7 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch } void Vocabulary::printSuffixes() const { - char word_buf[256], alt_buf[256]; + char word_buf[VOCAB_MAX_WORDLENGTH], alt_buf[VOCAB_MAX_WORDLENGTH]; Console *con = g_sci->getSciDebugger(); int i = 0; @@ -423,30 +470,25 @@ void Vocabulary::printParserWords() const { con->DebugPrintf("\n"); } -void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) { - if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) { - printf("Error(%04x)", nr); - return; - } +void _vocab_recursive_ptree_dump_treelike(ParseTreeNode *tree) { + assert(tree); - if (nodes[nr].type == kParseTreeLeafNode) - //printf("[%03x]%04x", nr, nodes[nr].content.value); - printf("%x", nodes[nr].content.value); + if (tree->type == kParseTreeLeafNode) + printf("%x", tree->value); else { - int lbranch = nodes[nr].content.branches[0]; - int rbranch = nodes[nr].content.branches[1]; - //printf("<[%03x]", nr); + ParseTreeNode* lbranch = tree->left; + ParseTreeNode* rbranch = tree->right; printf("<"); if (lbranch) - _vocab_recursive_ptree_dump_treelike(nodes, lbranch, nr); + _vocab_recursive_ptree_dump_treelike(lbranch); else printf("NULL"); printf(","); if (rbranch) - _vocab_recursive_ptree_dump_treelike(nodes, rbranch, nr); + _vocab_recursive_ptree_dump_treelike(rbranch); else printf("NULL"); @@ -454,55 +496,52 @@ void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int } } -void _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int blanks) { - int lbranch = nodes[nr].content.branches[0]; - int rbranch = nodes[nr].content.branches[1]; - int i; +void _vocab_recursive_ptree_dump(ParseTreeNode *tree, int blanks) { + assert(tree); - if (nodes[nr].type == kParseTreeLeafNode) { - printf("vocab_dump_parse_tree: Error: consp is nil for element %03x\n", nr); - return; - } + ParseTreeNode* lbranch = tree->left; + ParseTreeNode* rbranch = tree->right; + int i; - if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) { - printf("Error(%04x))", nr); + if (tree->type == kParseTreeLeafNode) { + printf("vocab_dump_parse_tree: Error: consp is nil\n"); return; } if (lbranch) { - if (nodes[lbranch].type == kParseTreeBranchNode) { + if (lbranch->type == kParseTreeBranchNode) { printf("\n"); for (i = 0; i < blanks; i++) printf(" "); printf("("); - _vocab_recursive_ptree_dump(nodes, lbranch, nr, blanks + 1); + _vocab_recursive_ptree_dump(lbranch, blanks + 1); printf(")\n"); for (i = 0; i < blanks; i++) printf(" "); } else - printf("%x", nodes[lbranch].content.value); + printf("%x", lbranch->value); printf(" "); }/* else printf ("nil");*/ if (rbranch) { - if (nodes[rbranch].type == kParseTreeBranchNode) - _vocab_recursive_ptree_dump(nodes, rbranch, nr, blanks); + if (rbranch->type == kParseTreeBranchNode) + _vocab_recursive_ptree_dump(rbranch, blanks); else - printf("%x", nodes[rbranch].content.value); + printf("%x", rbranch->value); }/* else printf("nil");*/ } -void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) { +void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes) { //_vocab_recursive_ptree_dump_treelike(nodes, 0, 0); printf("(setq %s \n'(", tree_name); - _vocab_recursive_ptree_dump(nodes, 0, 0, 1); + _vocab_recursive_ptree_dump(nodes, 1); printf("))\n"); } void Vocabulary::dumpParseTree() { //_vocab_recursive_ptree_dump_treelike(nodes, 0, 0); printf("(setq parse-tree \n'("); - _vocab_recursive_ptree_dump(_parserNodes, 0, 0, 1); + _vocab_recursive_ptree_dump(_parserNodes, 1); printf("))\n"); } @@ -522,10 +561,10 @@ void Vocabulary::printParserNodes(int num) { for (int i = 0; i < num; i++) { con->DebugPrintf(" Node %03x: ", i); if (_parserNodes[i].type == kParseTreeLeafNode) - con->DebugPrintf("Leaf: %04x\n", _parserNodes[i].content.value); + con->DebugPrintf("Leaf: %04x\n", _parserNodes[i].value); else - con->DebugPrintf("Branch: ->%04x, ->%04x\n", _parserNodes[i].content.branches[0], - _parserNodes[i].content.branches[1]); + con->DebugPrintf("Branch: ->%04x, ->%04x\n", _parserNodes[i].left, + _parserNodes[i].right); } } @@ -538,7 +577,7 @@ int Vocabulary::parseNodes(int *i, int *pos, int type, int nr, int argc, const c if (type == kParseNumber) { _parserNodes[*pos += 1].type = kParseTreeLeafNode; - _parserNodes[*pos].content.value = nr; + _parserNodes[*pos].value = nr; return *pos; } if (type == kParseEndOfInput) { @@ -570,7 +609,15 @@ int Vocabulary::parseNodes(int *i, int *pos, int type, int nr, int argc, const c } } - if ((newPos = _parserNodes[oldPos].content.branches[j] = parseNodes(i, pos, nextToken, nextValue, argc, argv)) == -1) + newPos = parseNodes(i, pos, nextToken, nextValue, argc, argv); + + if (j == 0) + _parserNodes[oldPos].left = &_parserNodes[newPos]; + else + _parserNodes[oldPos].right = &_parserNodes[newPos]; + + + if (newPos == -1) return -1; } diff --git a/engines/sci/parser/vocabulary.h b/engines/sci/parser/vocabulary.h index dccef0f5f3..d4df8af715 100644 --- a/engines/sci/parser/vocabulary.h +++ b/engines/sci/parser/vocabulary.h @@ -73,13 +73,16 @@ enum { kParseNumber = 4 }; +#define VOCAB_MAX_WORDLENGTH 256 + /* Anywords are ignored by the parser */ #define VOCAB_CLASS_ANYWORD 0xff /* This word class is used for numbers */ #define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */ +#define VOCAB_MAGIC_NOTHING_GROUP 0xffe -/* Number of nodes for each parse_tree_node structure */ +/* Number of nodes for each ParseTreeNode structure */ #define VOCAB_TREE_NODES 500 #define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140 @@ -115,7 +118,7 @@ struct ResultWord { typedef Common::List<ResultWord> ResultWordList; -typedef Common::HashMap<Common::String, ResultWord, Common::IgnoreCase_Hash, Common::IgnoreCase_EqualTo> WordMap; +typedef Common::HashMap<Common::String, ResultWord, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap; struct ParseRuleList; @@ -149,16 +152,16 @@ struct parse_tree_branch_t { }; enum ParseTypes { - kParseTreeLeafNode = 0, - kParseTreeBranchNode = 1 + kParseTreeWordNode = 4, + kParseTreeLeafNode = 5, + kParseTreeBranchNode = 6 }; -struct parse_tree_node_t { +struct ParseTreeNode { ParseTypes type; /**< leaf or branch */ - union { - int value; /**< For leaves */ - short branches[2]; /**< For branches */ - } content; + int value; /**< For leaves */ + ParseTreeNode* left; /**< Left child, for branches */ + ParseTreeNode* right; /**< Right child, for branches */ }; enum VocabularyVersions { @@ -168,9 +171,12 @@ enum VocabularyVersions { class Vocabulary { public: - Vocabulary(ResourceManager *resMan); + Vocabulary(ResourceManager *resMan, bool foreign); ~Vocabulary(); + // reset parser status + void reset(); + /** * Gets any word from the specified group. For debugging only. * @param group Group number @@ -229,7 +235,7 @@ public: * For debugging only. * @param pos pointer to the data to dump */ - void decipherSaidBlock(byte *pos); + void debugDecipherSaidBlock(const byte *pos); /** * Prints the parser suffixes to the debug console. @@ -301,6 +307,11 @@ private: ResourceManager *_resMan; VocabularyVersions _vocabVersion; + bool _foreign; + uint16 _resourceIdWords; + uint16 _resourceIdSuffixes; + uint16 _resourceIdBranches; + // Parser-related lists SuffixList _parserSuffixes; ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */ @@ -310,7 +321,7 @@ private: public: // Accessed by said() - parse_tree_node_t _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */ + ParseTreeNode _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */ // Parser data: reg_t parser_base; /**< Base address for the parser error reporting mechanism */ @@ -323,7 +334,7 @@ public: * @param tree_name Name of the tree to dump (free-form) * @param nodes The nodes containing the parse tree */ -void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes); +void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes); @@ -334,7 +345,7 @@ void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes); * @param verbose Whether to display the parse tree after building it * @return 1 on a match, 0 otherwise */ -int said(EngineState *s, byte *spec, bool verbose); +int said(EngineState *s, const byte *spec, bool verbose); } // End of namespace Sci |