aboutsummaryrefslogtreecommitdiff
path: root/engines/sci/parser
diff options
context:
space:
mode:
Diffstat (limited to 'engines/sci/parser')
-rw-r--r--engines/sci/parser/grammar.cpp44
-rw-r--r--engines/sci/parser/said.cpp2831
-rw-r--r--engines/sci/parser/said.y839
-rw-r--r--engines/sci/parser/vocabulary.cpp382
-rw-r--r--engines/sci/parser/vocabulary.h39
5 files changed, 973 insertions, 3162 deletions
diff --git a/engines/sci/parser/grammar.cpp b/engines/sci/parser/grammar.cpp
index 070e6767cf..6f37b49919 100644
--- a/engines/sci/parser/grammar.cpp
+++ b/engines/sci/parser/grammar.cpp
@@ -422,44 +422,44 @@ ParseRuleList *Vocabulary::buildGNF(bool verbose) {
return tlist;
}
-static int _vbpt_pareno(parse_tree_node_t *nodes, int *pos, int base) {
+static int _vbpt_pareno(ParseTreeNode *nodes, int *pos, int base) {
// Opens parentheses
- nodes[base].content.branches[0] = (*pos) + 1;
+ nodes[base].left = &nodes[(*pos) + 1];
nodes[++(*pos)].type = kParseTreeBranchNode;
- nodes[*pos].content.branches[0] = 0;
- nodes[*pos].content.branches[1] = 0;
+ nodes[*pos].left = 0;
+ nodes[*pos].right = 0;
return *pos;
}
-static int _vbpt_parenc(parse_tree_node_t *nodes, int *pos, int paren) {
+static int _vbpt_parenc(ParseTreeNode *nodes, int *pos, int paren) {
// Closes parentheses for appending
- nodes[paren].content.branches[1] = ++(*pos);
+ nodes[paren].right = &nodes[++(*pos)];
nodes[*pos].type = kParseTreeBranchNode;
- nodes[*pos].content.branches[0] = 0;
- nodes[*pos].content.branches[1] = 0;
+ nodes[*pos].left = 0;
+ nodes[*pos].right = 0;
return *pos;
}
-static int _vbpt_append(parse_tree_node_t *nodes, int *pos, int base, int value) {
+static int _vbpt_append(ParseTreeNode *nodes, int *pos, int base, int value) {
// writes one value to an existing base node and creates a successor node for writing
- nodes[base].content.branches[0] = ++(*pos);
+ nodes[base].left = &nodes[++(*pos)];
nodes[*pos].type = kParseTreeLeafNode;
- nodes[*pos].content.value = value;
- nodes[base].content.branches[1] = ++(*pos);
+ nodes[*pos].value = value;
+ nodes[base].right = &nodes[++(*pos)];
nodes[*pos].type = kParseTreeBranchNode;
- nodes[*pos].content.branches[0] = 0;
- nodes[*pos].content.branches[1] = 0;
+ nodes[*pos].left = 0;
+ nodes[*pos].right = 0;
return *pos;
}
-static int _vbpt_terminate(parse_tree_node_t *nodes, int *pos, int base, int value) {
+static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) {
// Terminates, overwriting a nextwrite forknode
nodes[base].type = kParseTreeLeafNode;
- nodes[base].content.value = value;
+ nodes[base].value = value;
return *pos;
}
-static int _vbpt_write_subexpression(parse_tree_node_t *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
+static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
uint token;
while ((token = ((rulepos < rule->_data.size()) ? rule->_data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) {
@@ -565,15 +565,15 @@ int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) {
int temp, pos;
_parserNodes[0].type = kParseTreeBranchNode;
- _parserNodes[0].content.branches[0] = 1;
- _parserNodes[0].content.branches[1] = 2;
+ _parserNodes[0].left = &_parserNodes[1];
+ _parserNodes[0].right = &_parserNodes[2];
_parserNodes[1].type = kParseTreeLeafNode;
- _parserNodes[1].content.value = 0x141;
+ _parserNodes[1].value = 0x141;
_parserNodes[2].type = kParseTreeBranchNode;
- _parserNodes[2].content.branches[0] = 0;
- _parserNodes[2].content.branches[1] = 0;
+ _parserNodes[2].left = 0;
+ _parserNodes[2].right = 0;
pos = 2;
diff --git a/engines/sci/parser/said.cpp b/engines/sci/parser/said.cpp
index f49704372a..9c07be2dff 100644
--- a/engines/sci/parser/said.cpp
+++ b/engines/sci/parser/said.cpp
@@ -1,111 +1,3 @@
-/* A Bison parser, made by GNU Bison 2.3. */
-
-/* Skeleton implementation for Bison's Yacc-like parsers in C
-
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
-
-/* As a special exception, you may create a larger work that contains
- part or all of the Bison parser skeleton and distribute that work
- under terms of your choice, so long as that work isn't itself a
- parser generator using the skeleton or a modified version thereof
- as a parser skeleton. Alternatively, if you modify or redistribute
- the parser skeleton itself, you may (at your option) remove this
- special exception, which will cause the skeleton and the resulting
- Bison output files to be licensed under the GNU General Public
- License without this special exception.
-
- This special exception was added by the Free Software Foundation in
- version 2.2 of Bison. */
-
-/* C LALR(1) parser skeleton written by Richard Stallman, by
- simplifying the original so-called "semantic" parser. */
-
-/* All symbols defined below should begin with yy or YY, to avoid
- infringing on user name space. This should be done even for local
- variables, as they might otherwise be expanded by user macros.
- There are some unavoidable exceptions within include files to
- define necessary library symbols; they are noted "INFRINGES ON
- USER NAME SPACE" below. */
-
-/* Identify Bison output. */
-#define YYBISON 1
-
-/* Bison version. */
-#define YYBISON_VERSION "2.3"
-
-/* Skeleton name. */
-#define YYSKELETON_NAME "yacc.c"
-
-/* Pure parsers. */
-#define YYPURE 0
-
-/* Using locations. */
-#define YYLSP_NEEDED 0
-
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- WGROUP = 258,
- YY_COMMA = 259,
- YY_AMP = 260,
- YY_SLASH = 261,
- YY_PARENO = 262,
- YY_PARENC = 263,
- YY_BRACKETSO = 264,
- YY_BRACKETSC = 265,
- YY_HASH = 266,
- YY_LT = 267,
- YY_GT = 268,
- YY_BRACKETSO_LT = 269,
- YY_BRACKETSO_SLASH = 270,
- YY_LT_BRACKETSO = 271,
- YY_LT_PARENO = 272
- };
-#endif
-/* Tokens. */
-#define WGROUP 258
-#define YY_COMMA 259
-#define YY_AMP 260
-#define YY_SLASH 261
-#define YY_PARENO 262
-#define YY_PARENC 263
-#define YY_BRACKETSO 264
-#define YY_BRACKETSC 265
-#define YY_HASH 266
-#define YY_LT 267
-#define YY_GT 268
-#define YY_BRACKETSO_LT 269
-#define YY_BRACKETSO_SLASH 270
-#define YY_LT_BRACKETSO 271
-#define YY_LT_PARENO 272
-
-
-
-
-/* Copy the first part of user declarations. */
-
-
/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
@@ -133,14 +25,6 @@
#include "sci/engine/state.h"
-
-// Bison generates an empty switch statement that gives a warning in MSVC.
-// This disables that warning.
-#ifdef _MSC_VER
-#pragma warning(disable:4065)
-#endif
-
-
namespace Sci {
#define SAID_BRANCH_NULL 0
@@ -150,25 +34,8 @@ namespace Sci {
// Maximum number of words to be expected in a parsed sentence
#define AUGMENT_MAX_WORDS 64
-
-#define ANYWORD 0xfff
-
-#define WORD_TYPE_BASE 0x141
-#define WORD_TYPE_REF 0x144
-#define WORD_TYPE_SYNTACTIC_SUGAR 0x145
-
-#define AUGMENT_SENTENCE_PART_BRACKETS 0x152
-
-// Minor numbers
-#define AUGMENT_SENTENCE_MINOR_MATCH_PHRASE 0x14c
-#define AUGMENT_SENTENCE_MINOR_MATCH_WORD 0x153
-#define AUGMENT_SENTENCE_MINOR_RECURSE 0x144
-#define AUGMENT_SENTENCE_MINOR_PARENTHESES 0x14f
-
-
-#undef YYDEBUG /*1*/
-//#define SAID_DEBUG*/
-//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION // uncomment to debug parse tree augmentation
+// uncomment to debug parse tree augmentation
+//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION
#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
@@ -179,1840 +46,622 @@ void print_nothing(...) { }
#endif
-static char *said_parse_error;
-
static int said_token;
static int said_tokens_nr;
static int said_tokens[MAX_SAID_TOKENS];
-static int said_blessed; // increminated by said_top_branch
-
-static int said_tree_pos; // Set to 0 if we're out of space
-#define SAID_TREE_START 4; // Reserve space for the 4 top nodes
-
-#define VALUE_IGNORE -424242
-
-static parse_tree_node_t said_tree[VOCAB_TREE_NODES];
-
-typedef int wgroup_t;
-typedef int tree_t;
-typedef int said_spec_t;
-
-static tree_t said_aug_branch(int, int, tree_t, tree_t);
-static tree_t said_attach_branch(tree_t, tree_t);
-/*
-static tree_t said_wgroup_branch(wgroup_t);
-*/
-static said_spec_t said_top_branch(tree_t);
-static tree_t said_paren(tree_t, tree_t);
-static tree_t said_value(int, tree_t);
-static tree_t said_terminal(int);
-
-static int yylex();
-
-static int yyerror(const char *s) {
- said_parse_error = strdup(s);
- return 1; /* Abort */
-}
-
-
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-
-/* Enabling verbose error messages. */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE 0
-#endif
-
-/* Enabling the token table. */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
-#endif
-
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-/* Copy the second part of user declarations. */
-
-
-/* Line 216 of yacc.c. */
-
-
-#ifdef short
-# undef short
-#endif
-#ifdef YYTYPE_UINT8
-typedef YYTYPE_UINT8 yytype_uint8;
-#else
-typedef unsigned char yytype_uint8;
-#endif
-
-#ifdef YYTYPE_INT8
-typedef YYTYPE_INT8 yytype_int8;
-#elif (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-typedef signed char yytype_int8;
-#else
-typedef short int yytype_int8;
-#endif
-
-#ifdef YYTYPE_UINT16
-typedef YYTYPE_UINT16 yytype_uint16;
-#else
-typedef unsigned short int yytype_uint16;
-#endif
-
-#ifdef YYTYPE_INT16
-typedef YYTYPE_INT16 yytype_int16;
-#else
-typedef short int yytype_int16;
-#endif
-
-#ifndef YYSIZE_T
-# ifdef __SIZE_TYPE__
-# define YYSIZE_T __SIZE_TYPE__
-# elif defined size_t
-# define YYSIZE_T size_t
-# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# else
-# define YYSIZE_T unsigned int
-# endif
-#endif
-
-#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
-
-#ifndef YY_
-# if YYENABLE_NLS
-# if ENABLE_NLS
-# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-# define YY_(msgid) dgettext ("bison-runtime", msgid)
-# endif
-# endif
-# ifndef YY_
-# define YY_(msgid) msgid
-# endif
-#endif
-
-/* Suppress unused-variable warnings by "using" E. */
-#if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
-#else
-# define YYUSE(e) /* empty */
-#endif
-
-/* Identity function, used to suppress warnings about constant conditions. */
-#ifndef lint
-# define YYID(n) (n)
-#else
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static int
-YYID (int i)
-#else
-static int
-YYID (i)
- int i;
-#endif
-{
- return i;
-}
-#endif
-
-#if ! defined yyoverflow || YYERROR_VERBOSE
-
-/* The parser invokes alloca or malloc; define the necessary symbols. */
-
-# ifdef YYSTACK_USE_ALLOCA
-# if YYSTACK_USE_ALLOCA
-# ifdef __GNUC__
-# define YYSTACK_ALLOC __builtin_alloca
-# elif defined __BUILTIN_VA_ARG_INCR
-# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-# elif defined _AIX
-# define YYSTACK_ALLOC __alloca
-# elif defined _MSC_VER
-# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
-# define alloca _alloca
-# else
-# define YYSTACK_ALLOC alloca
-# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
-# endif
-# endif
-# endif
-# endif
-# endif
-
-# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
-# ifndef YYSTACK_ALLOC_MAXIMUM
- /* The OS might guarantee only one guard page at the bottom of the stack,
- and a page size can be as small as 4096 bytes. So we cannot safely
- invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
- to allow for a few compiler-allocated temporary stack slots. */
-# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
-# endif
-# else
-# define YYSTACK_ALLOC YYMALLOC
-# define YYSTACK_FREE YYFREE
-# ifndef YYSTACK_ALLOC_MAXIMUM
-# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
-# endif
-# if (defined __cplusplus && ! defined _STDLIB_H \
- && ! ((defined YYMALLOC || defined malloc) \
- && (defined YYFREE || defined free)))
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
-# endif
-# endif
-# ifndef YYMALLOC
-# define YYMALLOC malloc
-# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# ifndef YYFREE
-# define YYFREE free
-# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void free (void *); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# endif
-#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
-
-
-#if (! defined yyoverflow \
- && (! defined __cplusplus \
- || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
-
-/* A type that is properly aligned for any stack member. */
-union yyalloc
-{
- yytype_int16 yyss;
- YYSTYPE yyvs;
- };
-
-/* The size of the maximum gap between one aligned stack and the next. */
-# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
-
-/* The size of an array large to enough to hold all stacks, each with
- N elements. */
-# define YYSTACK_BYTES(N) \
- ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
- + YYSTACK_GAP_MAXIMUM)
-
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (YYID (0))
-# endif
-# endif
-
-/* Relocate STACK from its old location to the new one. The
- local variables YYSIZE and YYSTACKSIZE give the old and new number of
- elements in the stack, and YYPTR gives the new location of the
- stack. Advance YYPTR to a properly aligned location for the next
- stack. */
-# define YYSTACK_RELOCATE(Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack, Stack, yysize); \
- Stack = &yyptr->Stack; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (YYID (0))
-
-#endif
-
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 23
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 80
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 18
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 13
-/* YYNRULES -- Number of rules. */
-#define YYNRULES 35
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 69
-
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK 272
-
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
-static const yytype_uint8 yytranslate[] =
-{
- 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17
-};
-
-#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const yytype_uint8 yyprhs[] =
-{
- 0, 0, 3, 6, 10, 15, 16, 18, 19, 21,
- 24, 29, 31, 34, 39, 41, 43, 45, 49, 51,
- 55, 59, 64, 70, 73, 75, 77, 79, 83, 88,
- 92, 97, 100, 105, 109, 112
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const yytype_int8 yyrhs[] =
-{
- 19, 0, -1, 21, 20, -1, 21, 22, 20, -1,
- 21, 22, 23, 20, -1, -1, 13, -1, -1, 27,
- -1, 6, 27, -1, 15, 6, 27, 10, -1, 6,
- -1, 6, 27, -1, 15, 6, 27, 10, -1, 6,
- -1, 3, -1, 26, -1, 9, 26, 10, -1, 24,
- -1, 7, 27, 8, -1, 26, 4, 26, -1, 26,
- 14, 29, 10, -1, 26, 4, 9, 26, 10, -1,
- 25, 28, -1, 25, -1, 28, -1, 29, -1, 14,
- 29, 10, -1, 29, 14, 29, 10, -1, 12, 24,
- 30, -1, 17, 7, 27, 8, -1, 12, 26, -1,
- 16, 9, 26, 10, -1, 12, 26, 30, -1, 12,
- 26, -1, 17, 7, 27, 8, -1
+static int said_tree_pos;
+#define SAID_TREE_START 4 // Reserve space for the 4 top nodes
+
+enum SaidToken {
+ TOKEN_COMMA = 0xF000,
+ TOKEN_AMP = 0xF100,
+ TOKEN_SLASH = 0xF200,
+ TOKEN_PARENO = 0xF300,
+ TOKEN_PARENC = 0xF400,
+ TOKEN_BRACKETO = 0xF500,
+ TOKEN_BRACKETC = 0xF600,
+ TOKEN_HASH = 0xF700,
+ TOKEN_LT = 0xF800,
+ TOKEN_GT = 0xF900,
+ TOKEN_TERM = 0xFF00
};
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const yytype_uint8 yyrline[] =
-{
- 0, 130, 130, 132, 134, 140, 141, 148, 149, 155,
- 157, 159, 165, 167, 169, 174, 179, 181, 186, 188,
- 190, 192, 194, 199, 201, 203, 208, 210, 212, 217,
- 219, 221, 223, 228, 230, 232
+enum SaidWord {
+ WORD_NONE = 0x0ffe,
+ WORD_ANY = 0x0fff
};
-#endif
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
-/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{
- "$end", "error", "$undefined", "WGROUP", "YY_COMMA", "YY_AMP",
- "YY_SLASH", "YY_PARENO", "YY_PARENC", "YY_BRACKETSO", "YY_BRACKETSC",
- "YY_HASH", "YY_LT", "YY_GT", "YY_BRACKETSO_LT", "YY_BRACKETSO_SLASH",
- "YY_LT_BRACKETSO", "YY_LT_PARENO", "$accept", "saidspec", "optcont",
- "leftspec", "midspec", "rightspec", "word", "cwordset", "wordset",
- "expr", "cwordrefset", "wordrefset", "recref", 0
-};
-#endif
-# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
-static const yytype_uint16 yytoknum[] =
-{
- 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 268, 269, 270, 271, 272
-};
-# endif
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_uint8 yyr1[] =
-{
- 0, 18, 19, 19, 19, 20, 20, 21, 21, 22,
- 22, 22, 23, 23, 23, 24, 25, 25, 26, 26,
- 26, 26, 26, 27, 27, 27, 28, 28, 28, 29,
- 29, 29, 29, 30, 30, 30
-};
+// TODO: maybe turn this into a proper n-ary tree instead of an
+// n-ary tree implemented in terms of a binary tree.
+// (Together with _parserNodes in Vocabulary)
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const yytype_uint8 yyr2[] =
-{
- 0, 2, 2, 3, 4, 0, 1, 0, 1, 2,
- 4, 1, 2, 4, 1, 1, 1, 3, 1, 3,
- 3, 4, 5, 2, 1, 1, 1, 3, 4, 3,
- 4, 2, 4, 3, 2, 4
-};
+static ParseTreeNode said_tree[VOCAB_TREE_NODES];
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
-static const yytype_uint8 yydefact[] =
-{
- 7, 15, 0, 0, 0, 0, 0, 0, 0, 5,
- 18, 24, 16, 8, 25, 26, 0, 0, 18, 31,
- 0, 0, 0, 1, 11, 6, 0, 2, 5, 23,
- 0, 0, 0, 19, 17, 0, 0, 29, 27, 0,
- 0, 9, 0, 14, 0, 3, 5, 0, 20, 0,
- 0, 34, 0, 32, 30, 0, 12, 0, 4, 0,
- 21, 28, 33, 0, 10, 0, 22, 35, 13
-};
-
-/* YYDEFGOTO[NTERM-NUM]. */
-static const yytype_int8 yydefgoto[] =
-{
- -1, 8, 27, 9, 28, 46, 10, 11, 12, 13,
- 14, 15, 37
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -24
-static const yytype_int8 yypact[] =
-{
- -1, -24, -1, 62, 62, 54, 1, 5, 18, 38,
- -24, 47, 3, -24, -24, 12, 23, 15, -3, 3,
- 28, 62, -1, -24, -1, -24, 42, -24, 39, -24,
- 53, 54, 54, -24, -24, 62, 50, -24, -24, 29,
- 41, -24, -1, -1, 52, -24, 55, 62, 3, 57,
- 63, 20, -1, -24, -24, 64, -24, -1, -24, 32,
- -24, -24, -24, 67, -24, 66, -24, -24, -24
-};
-
-/* YYPGOTO[NTERM-NUM]. */
-static const yytype_int8 yypgoto[] =
-{
- -24, -24, -23, -24, -24, -24, 68, -24, 0, -2,
- 69, -4, 26
-};
-
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const yytype_uint8 yytable[] =
-{
- 16, 20, 1, 17, 19, 45, 2, 30, 3, 35,
- 21, 4, 22, 5, 36, 6, 7, 31, 23, 30,
- 40, 39, 41, 58, 30, 34, 32, 49, 50, 31,
- 48, 33, 35, 30, 31, 51, 30, 36, 38, 53,
- 55, 56, 66, 31, 24, 43, 31, 59, 42, 54,
- 63, 25, 25, 26, 44, 65, 1, 52, 57, 4,
- 2, 5, 47, 6, 7, 1, 4, 60, 25, 2,
- 6, 7, 18, 61, 64, 67, 68, 62, 0, 0,
- 29
-};
-
-static const yytype_int8 yycheck[] =
-{
- 2, 5, 3, 3, 4, 28, 7, 4, 9, 12,
- 9, 12, 7, 14, 17, 16, 17, 14, 0, 4,
- 22, 21, 24, 46, 4, 10, 14, 31, 32, 14,
- 30, 8, 12, 4, 14, 35, 4, 17, 10, 10,
- 42, 43, 10, 14, 6, 6, 14, 47, 6, 8,
- 52, 13, 13, 15, 15, 57, 3, 7, 6, 12,
- 7, 14, 9, 16, 17, 3, 12, 10, 13, 7,
- 16, 17, 4, 10, 10, 8, 10, 51, -1, -1,
- 11
-};
-
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const yytype_uint8 yystos[] =
-{
- 0, 3, 7, 9, 12, 14, 16, 17, 19, 21,
- 24, 25, 26, 27, 28, 29, 27, 26, 24, 26,
- 29, 9, 7, 0, 6, 13, 15, 20, 22, 28,
- 4, 14, 14, 8, 10, 12, 17, 30, 10, 26,
- 27, 27, 6, 6, 15, 20, 23, 9, 26, 29,
- 29, 26, 7, 10, 8, 27, 27, 6, 20, 26,
- 10, 10, 30, 27, 10, 27, 10, 8, 10
-};
-
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
-
-#define YYFAIL goto yyerrlab
-
-#define YYRECOVERING() (!!yyerrstatus)
-
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
- YYPOPSTACK (1); \
- goto yybackup; \
- } \
- else \
- { \
- yyerror (YY_("syntax error: cannot back up")); \
- YYERROR; \
- } \
-while (YYID (0))
-
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (YYID (N)) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (YYID (0))
-#endif
-
-
-/* YY_LOCATION_PRINT -- Print the location on the stream.
- This macro was not mandated originally: define only if we know
- we won't break user code: when these are the locations we know. */
-
-#ifndef YY_LOCATION_PRINT
-# if YYLTYPE_IS_TRIVIAL
-# define YY_LOCATION_PRINT(File, Loc) \
- fprintf (File, "%d.%d-%d.%d", \
- (Loc).first_line, (Loc).first_column, \
- (Loc).last_line, (Loc).last_column)
-# else
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-# endif
-#endif
+typedef int wgroup_t;
+typedef int said_spec_t;
-/* YYLEX -- calling `yylex' with the right arguments. */
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (YYLEX_PARAM)
-#else
-# define YYLEX yylex ()
-#endif
+static ParseTreeNode* said_next_node() {
+ assert(said_tree_pos > 0 && said_tree_pos < VOCAB_TREE_NODES);
-/* Enable debugging if requested. */
-#if YYDEBUG
-
-# ifndef YYFPRINTF
-# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
-# define YYFPRINTF fprintf
-# endif
-
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (YYID (0))
-
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yy_symbol_print (stderr, \
- Type, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (YYID (0))
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_value_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (!yyvaluep)
- return;
-# ifdef YYPRINT
- if (yytype < YYNTOKENS)
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# else
- YYUSE (yyoutput);
-# endif
- switch (yytype)
- {
- default:
- break;
- }
+ return &said_tree[said_tree_pos++];
}
+static ParseTreeNode* said_leaf_node(ParseTreeNode* pos, int value) {
+ pos->type = kParseTreeLeafNode;
+ pos->value = value;
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (yytype < YYNTOKENS)
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
-
- yy_symbol_value_print (yyoutput, yytype, yyvaluep);
- YYFPRINTF (yyoutput, ")");
+ return pos;
}
-/*------------------------------------------------------------------.
-| yy_stack_print -- Print the state stack from its BOTTOM up to its |
-| TOP (included). |
-`------------------------------------------------------------------*/
+static ParseTreeNode* said_word_node(ParseTreeNode* pos, int value) {
+ pos->type = kParseTreeWordNode;
+ pos->value = value;
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_stack_print (yytype_int16 *bottom, yytype_int16 *top)
-#else
-static void
-yy_stack_print (bottom, top)
- yytype_int16 *bottom;
- yytype_int16 *top;
-#endif
-{
- YYFPRINTF (stderr, "Stack now");
- for (; bottom <= top; ++bottom)
- YYFPRINTF (stderr, " %d", *bottom);
- YYFPRINTF (stderr, "\n");
+ return pos;
}
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (YYID (0))
-
+static ParseTreeNode* said_branch_node(ParseTreeNode* pos,
+ ParseTreeNode* left,
+ ParseTreeNode* right) {
+ pos->type = kParseTreeBranchNode;
+ pos->left = left;
+ pos->right = right;
-/*------------------------------------------------.
-| Report that the YYRULE is going to be reduced. |
-`------------------------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
-#else
-static void
-yy_reduce_print (yyvsp, yyrule)
- YYSTYPE *yyvsp;
- int yyrule;
-#endif
-{
- int yynrhs = yyr2[yyrule];
- int yyi;
- unsigned long int yylno = yyrline[yyrule];
- YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
- yyrule - 1, yylno);
- /* The symbols being reduced. */
- for (yyi = 0; yyi < yynrhs; yyi++)
- {
- fprintf (stderr, " $%d = ", yyi + 1);
- yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
- &(yyvsp[(yyi + 1) - (yynrhs)])
- );
- fprintf (stderr, "\n");
- }
+ return pos;
}
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (yyvsp, Rule); \
-} while (YYID (0))
-
-/* Nonzero means print parse trace. It is left uninitialized so that
- multiple parsers can coexist. */
-int yydebug;
-#else /* !YYDEBUG */
-# define YYDPRINTF(Args)
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
-# define YY_STACK_PRINT(Bottom, Top)
-# define YY_REDUCE_PRINT(Rule)
-#endif /* !YYDEBUG */
-
-
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
-# define YYINITDEPTH 200
-#endif
+static ParseTreeNode* said_branch_attach_left(ParseTreeNode* pos,
+ ParseTreeNode* left) {
+ pos->type = kParseTreeBranchNode;
+ pos->left = left;
-/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
- if the built-in stack extension method is used).
-
- Do not make this value too large; the results are undefined if
- YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
- evaluated with infinite-precision integer arithmetic. */
-
-#ifndef YYMAXDEPTH
-# define YYMAXDEPTH 10000
-#endif
+ return pos;
-
+}
-#if YYERROR_VERBOSE
+static ParseTreeNode* said_branch_attach_right(ParseTreeNode* pos,
+ ParseTreeNode* right) {
+ pos->type = kParseTreeBranchNode;
+ pos->right = right;
-# ifndef yystrlen
-# if defined __GLIBC__ && defined _STRING_H
-# define yystrlen strlen
-# else
-/* Return the length of YYSTR. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static YYSIZE_T
-yystrlen (const char *yystr)
-#else
-static YYSIZE_T
-yystrlen (yystr)
- const char *yystr;
-#endif
-{
- YYSIZE_T yylen;
- for (yylen = 0; yystr[yylen]; yylen++)
- continue;
- return yylen;
+ return pos;
}
-# endif
-# endif
-
-# ifndef yystpcpy
-# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
-# define yystpcpy stpcpy
-# else
-/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
- YYDEST. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static char *
-yystpcpy (char *yydest, const char *yysrc)
-#else
-static char *
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-#endif
-{
- char *yyd = yydest;
- const char *yys = yysrc;
- while ((*yyd++ = *yys++) != '\0')
- continue;
- return yyd - 1;
-}
-# endif
-# endif
-
-# ifndef yytnamerr
-/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
- quotes and backslashes, so that it's suitable for yyerror. The
- heuristic is that double-quoting is unnecessary unless the string
- contains an apostrophe, a comma, or backslash (other than
- backslash-backslash). YYSTR is taken from yytname. If YYRES is
- null, do not copy; instead, return the length of what the result
- would have been. */
-static YYSIZE_T
-yytnamerr (char *yyres, const char *yystr)
-{
- if (*yystr == '"')
- {
- YYSIZE_T yyn = 0;
- char const *yyp = yystr;
-
- for (;;)
- switch (*++yyp)
- {
- case '\'':
- case ',':
- goto do_not_strip_quotes;
-
- case '\\':
- if (*++yyp != '\\')
- goto do_not_strip_quotes;
- /* Fall through. */
- default:
- if (yyres)
- yyres[yyn] = *yyp;
- yyn++;
- break;
-
- case '"':
- if (yyres)
- yyres[yyn] = '\0';
- return yyn;
- }
- do_not_strip_quotes: ;
- }
-
- if (! yyres)
- return yystrlen (yystr);
-
- return yystpcpy (yyres, yystr) - yyres;
-}
-# endif
-
-/* Copy into YYRESULT an error message about the unexpected token
- YYCHAR while in state YYSTATE. Return the number of bytes copied,
- including the terminating null byte. If YYRESULT is null, do not
- copy anything; just return the number of bytes that would be
- copied. As a special case, return 0 if an ordinary "syntax error"
- message will do. Return YYSIZE_MAXIMUM if overflow occurs during
- size calculation. */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yychar)
-{
- int yyn = yypact[yystate];
-
- if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
- return 0;
- else
- {
- int yytype = YYTRANSLATE (yychar);
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
- YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
- int yysize_overflow = 0;
- enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
- int yyx;
-
-# if 0
- /* This is so xgettext sees the translatable formats that are
- constructed on the fly. */
- YY_("syntax error, unexpected %s");
- YY_("syntax error, unexpected %s, expecting %s");
- YY_("syntax error, unexpected %s, expecting %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
-# endif
- char *yyfmt;
- char const *yyf;
- static char const yyunexpected[] = "syntax error, unexpected %s";
- static char const yyexpecting[] = ", expecting %s";
- static char const yyor[] = " or %s";
- char yyformat[sizeof yyunexpected
- + sizeof yyexpecting - 1
- + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
- * (sizeof yyor - 1))];
- char const *yyprefix = yyexpecting;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn + 1;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 1;
-
- yyarg[0] = yytname[yytype];
- yyfmt = yystpcpy (yyformat, yyunexpected);
-
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- yysize = yysize0;
- yyformat[sizeof yyunexpected - 1] = '\0';
- break;
- }
- yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
- yyfmt = yystpcpy (yyfmt, yyprefix);
- yyprefix = yyor;
- }
-
- yyf = YY_(yyformat);
- yysize1 = yysize + yystrlen (yyf);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
-
- if (yysize_overflow)
- return YYSIZE_MAXIMUM;
-
- if (yyresult)
- {
- /* Avoid sprintf, as that infringes on the user's name space.
- Don't have undefined behavior even if the translation
- produced a string with the wrong number of "%s"s. */
- char *yyp = yyresult;
- int yyi = 0;
- while ((*yyp = *yyf) != '\0')
- {
- if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
- {
- yyp += yytnamerr (yyp, yyarg[yyi++]);
- yyf += 2;
- }
- else
- {
- yyp++;
- yyf++;
- }
- }
- }
- return yysize;
- }
-}
-#endif /* YYERROR_VERBOSE */
-
-
-/*-----------------------------------------------.
-| Release the memory associated to this symbol. |
-`-----------------------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yymsg, yytype, yyvaluep)
- const char *yymsg;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- YYUSE (yyvaluep);
+/*
+ pos
+ / \
+ . \
+ *
+ / \
+ / 0
+ *
+ / \
+ / \
+ / subtree
+ major / \
+ / .
+ minor
+
+ . = unchanged child node
+ * = new branch node
+ 0 = NULL child node. (Location for future siblings of the subtree)
- if (!yymsg)
- yymsg = "Deleting";
- YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+*/
- switch (yytype)
- {
+static bool said_attach_subtree(ParseTreeNode* pos, int major, int minor,
+ ParseTreeNode* subtree) {
+ bool retval = true;
- default:
- break;
- }
-}
-
+ said_branch_attach_right(pos,
+ said_branch_node(said_next_node(),
+ said_branch_node(said_next_node(),
+ said_leaf_node(said_next_node(), major),
+ said_branch_attach_left(subtree,
+ said_leaf_node(said_next_node(), minor))),
+ 0));
-/* Prevent warnings from -Wmissing-prototypes. */
+ return retval;
+}
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-/* The look-ahead symbol. */
-int yychar;
+/*****************/
+/**** Parsing ****/
+/*****************/
-/* The semantic value of the look-ahead symbol. */
-YYSTYPE yylval;
+static bool parseSpec(ParseTreeNode* parentNode);
+static bool parsePart2(ParseTreeNode* parentNode, bool& nonempty);
+static bool parsePart3(ParseTreeNode* parentNode, bool& nonempty);
+static bool parseSlash(ParseTreeNode* parentNode);
+static bool parseExpr(ParseTreeNode* parentNode);
+static bool parseRef(ParseTreeNode* parentNode);
+static bool parseComma(ParseTreeNode* parentNode);
+static bool parseList(ParseTreeNode* parentNode);
+static bool parseListEntry(ParseTreeNode* parentNode);
+static bool parseWord(ParseTreeNode* parentNode);
-/* Number of syntax errors so far. */
-int yynerrs;
+static bool parseWord(ParseTreeNode* parentNode)
+{
+ int token = said_tokens[said_token];
+ if (token & 0x8000)
+ return false;
+ said_token++;
+ ParseTreeNode* newNode = said_word_node(said_next_node(), token);
-/*----------.
-| yyparse. |
-`----------*/
+ parentNode->right = newNode;
-#ifdef YYPARSE_PARAM
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void *YYPARSE_PARAM)
-#else
-int
-yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-#endif
-#else /* ! YYPARSE_PARAM */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void)
-#else
-int
-yyparse ()
+ return true;
+}
-#endif
-#endif
+static bool parsePart2(ParseTreeNode* parentNode, bool& nonempty)
{
-
- int yystate;
- int yyn;
- int yyresult;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
- /* Look-ahead token as an internal (translated) token number. */
- int yytoken = 0;
-#if YYERROR_VERBOSE
- /* Buffer for error messages, and its allocated size. */
- char yymsgbuf[128];
- char *yymsg = yymsgbuf;
- YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
-#endif
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- /* Three stacks and their tools:
- `yyss': related to states,
- `yyvs': related to semantic values,
- `yyls': related to locations.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- yytype_int16 yyssa[YYINITDEPTH];
- yytype_int16 *yyss = yyssa;
- yytype_int16 *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs = yyvsa;
- YYSTYPE *yyvsp;
-
-
-
-#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
- YYSIZE_T yystacksize = YYINITDEPTH;
-
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
+ nonempty = true;
+ bool found;
- /* The number of symbols on the RHS of the reduced rule.
- Keep to zero when no symbol should be popped. */
- int yylen = 0;
+ found = parseSlash(newNode);
- YYDPRINTF ((stderr, "Starting parse\n"));
+ if (found) {
- yystate = 0;
- yyerrstatus = 0;
- yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
+ said_attach_subtree(parentNode, 0x142, 0x14a, newNode);
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
+ return true;
- yyssp = yyss;
- yyvsp = yyvs;
+ } else if (said_tokens[said_token] == TOKEN_BRACKETO) {
+ said_token++;
+
+ found = parsePart2(newNode, nonempty);
- goto yysetstate;
+ if (found) {
-/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
-`------------------------------------------------------------*/
- yynewstate:
- /* In all cases, when you get here, the value and location stacks
- have just been pushed. So pushing a state here evens the stacks. */
- yyssp++;
-
- yysetstate:
- *yyssp = yystate;
-
- if (yyss + yystacksize - 1 <= yyssp)
- {
- /* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
-
-#ifdef yyoverflow
- {
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- yytype_int16 *yyss1 = yyss;
-
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow (YY_("memory exhausted"),
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
-
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
- }
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyexhaustedlab;
-# else
- /* Extend the stack our own way. */
- if (YYMAXDEPTH <= yystacksize)
- goto yyexhaustedlab;
- yystacksize *= 2;
- if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
-
- {
- yytype_int16 *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyexhaustedlab;
- YYSTACK_RELOCATE (yyss);
- YYSTACK_RELOCATE (yyvs);
-
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
- }
-# endif
-#endif /* no yyoverflow */
-
- yyssp = yyss + yysize - 1;
- yyvsp = yyvs + yysize - 1;
-
-
- YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
-
- if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
- }
-
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-
- goto yybackup;
-
-/*-----------.
-| yybackup. |
-`-----------*/
-yybackup:
-
- /* Do appropriate processing given the current state. Read a
- look-ahead token if we need one and don't already have one. */
-
- /* First try to decide what to do without reference to look-ahead token. */
- yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
- goto yydefault;
-
- /* Not known => get a look-ahead token if don't already have one. */
-
- /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
- if (yychar == YYEMPTY)
- {
- YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
- }
-
- if (yychar <= YYEOF)
- {
- yychar = yytoken = YYEOF;
- YYDPRINTF ((stderr, "Now at end of input.\n"));
- }
- else
- {
- yytoken = YYTRANSLATE (yychar);
- YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
- }
-
- /* If the proper action on seeing token YYTOKEN is to reduce or to
- detect an error, take that action. */
- yyn += yytoken;
- if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
- goto yydefault;
- yyn = yytable[yyn];
- if (yyn <= 0)
- {
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
- yyn = -yyn;
- goto yyreduce;
- }
+ if (said_tokens[said_token] == TOKEN_BRACKETC) {
+ said_token++;
- if (yyn == YYFINAL)
- YYACCEPT;
+ said_attach_subtree(parentNode, 0x152, 0x142, newNode);
- /* Count tokens shifted since error; after three, turn off error
- status. */
- if (yyerrstatus)
- yyerrstatus--;
+ return true;
+ }
+ }
- /* Shift the look-ahead token. */
- YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+ }
- /* Discard the shifted token unless it is eof. */
- if (yychar != YYEOF)
- yychar = YYEMPTY;
+ // CHECKME: this doesn't look right if the [] section matched partially
+ // Should the below 'if' be an 'else if' ?
- yystate = yyn;
- *++yyvsp = yylval;
+ if (said_tokens[said_token] == TOKEN_SLASH) {
+ said_token++;
- goto yynewstate;
+ nonempty = false;
+ return true;
-/*-----------------------------------------------------------.
-| yydefault -- do the default action for the current state. |
-`-----------------------------------------------------------*/
-yydefault:
- yyn = yydefact[yystate];
- if (yyn == 0)
- goto yyerrlab;
- goto yyreduce;
+ }
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
-/*-----------------------------.
-| yyreduce -- Do a reduction. |
-`-----------------------------*/
-yyreduce:
- /* yyn is the number of a rule to reduce with. */
- yylen = yyr2[yyn];
+static bool parsePart3(ParseTreeNode* parentNode, bool& nonempty)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- /* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
- Otherwise, the following line sets YYVAL to garbage.
- This behavior is undocumented and Bison
- users should not rely upon it. Assigning to YYVAL
- unconditionally makes the parser a bit smaller, and it avoids a
- GCC warning that YYVAL may be used uninitialized. */
- yyval = yyvsp[1-yylen];
+ bool found;
+ nonempty = true;
- YY_REDUCE_PRINT (yyn);
- switch (yyn)
- {
- case 2:
+ found = parseSlash(newNode);
- { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)]))); ;}
- break;
+ if (found) {
- case 3:
+ said_attach_subtree(parentNode, 0x143, 0x14a, newNode);
- { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (3)]), said_attach_branch((yyvsp[(2) - (3)]), (yyvsp[(3) - (3)])))); ;}
- break;
+ return true;
- case 4:
+ } else if (said_tokens[said_token] == TOKEN_BRACKETO) {
+ said_token++;
+
+ found = parsePart3(newNode, nonempty);
- { (yyval) = said_top_branch(said_attach_branch((yyvsp[(1) - (4)]), said_attach_branch((yyvsp[(2) - (4)]), said_attach_branch((yyvsp[(3) - (4)]), (yyvsp[(4) - (4)]))))); ;}
- break;
+ if (found) {
- case 5:
+ if (said_tokens[said_token] == TOKEN_BRACKETC) {
+ said_token++;
- { (yyval) = SAID_BRANCH_NULL; ;}
- break;
+ said_attach_subtree(parentNode, 0x152, 0x143, newNode);
- case 6:
+ return true;
+ }
+ }
- { (yyval) = said_paren(said_value(0x14b, said_value(0xf900, said_terminal(0xf900))), SAID_BRANCH_NULL); ;}
- break;
+ }
- case 7:
+ // CHECKME: this doesn't look right if the [] section matched partially
+ // Should the below 'if' be an 'else if' ?
- { (yyval) = SAID_BRANCH_NULL; ;}
- break;
+ if (said_tokens[said_token] == TOKEN_SLASH) {
+ said_token++;
- case 8:
+ nonempty = false;
- { (yyval) = said_paren(said_value(0x141, said_value(0x149, (yyvsp[(1) - (1)]))), SAID_BRANCH_NULL); ;}
- break;
+ return true;
- case 9:
+ }
- { (yyval) = said_aug_branch(0x142, 0x14a, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;}
- break;
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
- case 10:
- { (yyval) = said_aug_branch(0x152, 0x142, said_aug_branch(0x142, 0x14a, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
+static bool parseSlash(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- case 11:
+ if (said_tokens[said_token] == TOKEN_SLASH) {
+ said_token++;
- { (yyval) = SAID_BRANCH_NULL; ;}
- break;
+ bool found = parseExpr(parentNode);
- case 12:
+ if (found)
+ return true;
- { (yyval) = said_aug_branch(0x143, 0x14a, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;}
- break;
+ }
- case 13:
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
- { (yyval) = said_aug_branch(0x152, 0x143, said_aug_branch(0x143, 0x14a, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
- case 14:
+static bool parseRef(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- { (yyval) = SAID_BRANCH_NULL; ;}
- break;
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
- case 15:
+ ParseTreeNode* newParent = parentNode;
- { (yyval) = said_paren(said_value(0x141, said_value(0x153, said_terminal((yyvsp[(1) - (1)])))), SAID_BRANCH_NULL); ;}
- break;
+ bool found;
- case 16:
+ if (said_tokens[said_token] == TOKEN_LT) {
+ said_token++;
- { (yyval) = said_aug_branch(0x141, 0x14f, (yyvsp[(1) - (1)]), SAID_BRANCH_NULL); ;}
- break;
+ found = parseList(newNode);
- case 17:
+ if (found) {
- { (yyval) = said_aug_branch(0x141, 0x14f, said_aug_branch(0x152, 0x14c, said_aug_branch(0x141, 0x14f, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
+ said_attach_subtree(newParent, 0x144, 0x14f, newNode);
- case 18:
+ newParent = newParent->right;
+
+ newNode = said_branch_node(said_next_node(), 0, 0);
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
+ found = parseRef(newNode);
- case 19:
+ if (found) {
- { (yyval) = said_aug_branch(0x141, 0x14c, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL); ;}
- break;
+ said_attach_subtree(newParent, 0x141, 0x144, newNode);
- case 20:
+ }
- { (yyval) = said_attach_branch((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
+ return true;
- case 21:
+ }
- { (yyval) = said_attach_branch((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;}
- break;
+ }
- case 22:
+ // NB: This is not an "else if'.
+ // If there is a "< [ ... ]", that is parsed as "< ..."
- { (yyval) = said_attach_branch((yyvsp[(1) - (5)]), (yyvsp[(3) - (5)])); ;}
- break;
+ if (said_tokens[said_token] == TOKEN_BRACKETO) {
+ said_token++;
+
+ found = parseRef(newNode);
- case 23:
+ if (found) {
- { (yyval) = said_attach_branch((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;}
- break;
+ if (said_tokens[said_token] == TOKEN_BRACKETC) {
+ said_token++;
- case 24:
+ said_attach_subtree(parentNode, 0x152, 0x144, newNode);
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
+ return true;
+ }
+ }
- case 25:
+ }
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
- case 26:
+static bool parseComma(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
+ if (said_tokens[said_token] == TOKEN_COMMA) {
+ said_token++;
- case 27:
+ bool found = parseList(parentNode);
- { (yyval) = said_aug_branch(0x152, 0x144, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL); ;}
- break;
+ if (found)
+ return true;
- case 28:
+ }
- { (yyval) = said_attach_branch((yyvsp[(1) - (4)]), said_aug_branch(0x152, 0x144, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL)); ;}
- break;
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
- case 29:
+static bool parseListEntry(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- { (yyval) = said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
- case 30:
+ bool found;
- { (yyval) = said_aug_branch(0x144, 0x14f, said_aug_branch(0x141, 0x144, (yyvsp[(2) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
+ if (said_tokens[said_token] == TOKEN_BRACKETO) {
+ said_token++;
- case 31:
+ found = parseExpr(newNode);
- { (yyval) = said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL); ;}
- break;
+ if (found) {
- case 32:
+ if (said_tokens[said_token] == TOKEN_BRACKETC) {
+ said_token++;
- { (yyval) = said_aug_branch(0x152, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(3) - (4)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
+ said_attach_subtree(parentNode, 0x152, 0x14c, newNode);
- case 33:
+ return true;
+ }
+ }
- { (yyval) = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (3)]), SAID_BRANCH_NULL), (yyvsp[(3) - (3)])); ;}
- break;
+ } else if (said_tokens[said_token] == TOKEN_PARENO) {
+ said_token++;
- case 34:
+ found = parseExpr(newNode);
- { (yyval) = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, (yyvsp[(2) - (2)]), SAID_BRANCH_NULL), SAID_BRANCH_NULL); ;}
- break;
+ if (found) {
- case 35:
+ if (said_tokens[said_token] == TOKEN_PARENC) {
+ said_token++;
- { (yyval) = said_aug_branch(0x141, 0x14c, (yyvsp[(2) - (4)]), SAID_BRANCH_NULL); ;}
- break;
+ said_attach_subtree(parentNode, 0x141, 0x14c, newNode);
+ return true;
+ }
+ }
-/* Line 1267 of yacc.c. */
+ } else if (parseWord(newNode)) {
- default: break;
- }
- YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+ said_attach_subtree(parentNode, 0x141, 0x153, newNode);
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
+ return true;
- *++yyvsp = yyval;
+ }
- /* Now `shift' the result of the reduction. Determine what state
- that goes to, based on the state we popped back to and the rule
- number reduced by. */
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
- yyn = yyr1[yyn];
+static bool parseList(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
+ bool found;
- goto yynewstate;
+ ParseTreeNode* newParent = parentNode;
+ found = parseListEntry(newParent);
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
-yyerrlab:
- /* If not already recovering from an error, report this error. */
- if (!yyerrstatus)
- {
- ++yynerrs;
-#if ! YYERROR_VERBOSE
- yyerror (YY_("syntax error"));
-#else
- {
- YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
- {
- YYSIZE_T yyalloc = 2 * yysize;
- if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
- yyalloc = YYSTACK_ALLOC_MAXIMUM;
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
- yymsg = (char *) YYSTACK_ALLOC (yyalloc);
- if (yymsg)
- yymsg_alloc = yyalloc;
- else
- {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
- }
- }
-
- if (0 < yysize && yysize <= yymsg_alloc)
- {
- (void) yysyntax_error (yymsg, yystate, yychar);
- yyerror (yymsg);
- }
- else
- {
- yyerror (YY_("syntax error"));
- if (yysize != 0)
- goto yyexhaustedlab;
- }
- }
-#endif
- }
+ if (found) {
+ newParent = newParent->right;
+ found = parseComma(newParent);
- if (yyerrstatus == 3)
- {
- /* If just tried and failed to reuse look-ahead token after an
- error, discard it. */
+ return true;
- if (yychar <= YYEOF)
- {
- /* Return failure if at end of input. */
- if (yychar == YYEOF)
- YYABORT;
- }
- else
- {
- yydestruct ("Error: discarding",
- yytoken, &yylval);
- yychar = YYEMPTY;
- }
- }
-
- /* Else will try to reuse look-ahead token after shifting the error
- token. */
- goto yyerrlab1;
-
-
-/*---------------------------------------------------.
-| yyerrorlab -- error raised explicitly by YYERROR. |
-`---------------------------------------------------*/
-yyerrorlab:
-
- /* Pacify compilers like GCC when the user code never invokes
- YYERROR and the label yyerrorlab therefore never appears in user
- code. */
- if (/*CONSTCOND*/ 0)
- goto yyerrorlab;
-
- /* Do not reclaim the symbols of the rule which action triggered
- this YYERROR. */
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
- yystate = *yyssp;
- goto yyerrlab1;
-
-
-/*-------------------------------------------------------------.
-| yyerrlab1 -- common code for both syntax error and YYERROR. |
-`-------------------------------------------------------------*/
-yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
-
- for (;;)
- {
- yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
}
- /* Pop the current state because it cannot handle the error token. */
- if (yyssp == yyss)
- YYABORT;
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
+}
+static bool parseExpr(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
- yydestruct ("Error: popping",
- yystos[yystate], yyvsp);
- YYPOPSTACK (1);
- yystate = *yyssp;
- YY_STACK_PRINT (yyss, yyssp);
- }
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
- if (yyn == YYFINAL)
- YYACCEPT;
+ bool ret = false;
+ bool found;
- *++yyvsp = yylval;
+ ParseTreeNode* newParent = parentNode;
+ found = parseList(newNode);
- /* Shift the error token. */
- YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+ if (found) {
+ ret = true;
- yystate = yyn;
- goto yynewstate;
+ said_attach_subtree(newParent, 0x141, 0x14F, newNode);
+ newParent = newParent->right;
-/*-------------------------------------.
-| yyacceptlab -- YYACCEPT comes here. |
-`-------------------------------------*/
-yyacceptlab:
- yyresult = 0;
- goto yyreturn;
+ }
-/*-----------------------------------.
-| yyabortlab -- YYABORT comes here. |
-`-----------------------------------*/
-yyabortlab:
- yyresult = 1;
- goto yyreturn;
+ found = parseRef(newParent);
-#ifndef yyoverflow
-/*-------------------------------------------------.
-| yyexhaustedlab -- memory exhaustion comes here. |
-`-------------------------------------------------*/
-yyexhaustedlab:
- yyerror (YY_("memory exhausted"));
- yyresult = 2;
- /* Fall through. */
-#endif
+ if (found || ret)
+ return true;
-yyreturn:
- if (yychar != YYEOF && yychar != YYEMPTY)
- yydestruct ("Cleanup: discarding lookahead",
- yytoken, &yylval);
- /* Do not reclaim the symbols of the rule which action triggered
- this YYABORT or YYACCEPT. */
- YYPOPSTACK (yylen);
- YY_STACK_PRINT (yyss, yyssp);
- while (yyssp != yyss)
- {
- yydestruct ("Cleanup: popping",
- yystos[*yyssp], yyvsp);
- YYPOPSTACK (1);
- }
-#ifndef yyoverflow
- if (yyss != yyssa)
- YYSTACK_FREE (yyss);
-#endif
-#if YYERROR_VERBOSE
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
-#endif
- /* Make sure YYID is used. */
- return YYID (yyresult);
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
}
+static bool parseSpec(ParseTreeNode* parentNode)
+{
+ // Store current state for rolling back if we fail
+ int curToken = said_token;
+ int curTreePos = said_tree_pos;
+ ParseTreeNode* curRightChild = parentNode->right;
+ ParseTreeNode* newNode = said_branch_node(said_next_node(), 0, 0);
+ bool ret = false;
+ bool found;
-int parse_yy_token_lookup[] = {YY_COMMA, YY_AMP, YY_SLASH, YY_PARENO, YY_PARENC, YY_BRACKETSO, YY_BRACKETSC, YY_HASH, YY_LT, YY_GT};
+ ParseTreeNode* newParent = parentNode;
-static int yylex() {
- int retval = said_tokens[said_token++];
+ found = parseExpr(newNode);
- if (retval < SAID_LONG(SAID_FIRST)) {
- yylval = retval;
- retval = WGROUP;
- } else {
- retval >>= 8;
-
- if (retval == SAID_TERM)
- retval = 0;
- else {
- assert(retval >= SAID_FIRST);
- retval = parse_yy_token_lookup[retval - SAID_FIRST];
- if (retval == YY_BRACKETSO) {
- if ((said_tokens[said_token] >> 8) == SAID_LT)
- retval = YY_BRACKETSO_LT;
- else
- if ((said_tokens[said_token] >> 8) == SAID_SLASH)
- retval = YY_BRACKETSO_SLASH;
- } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_BRACKO) {
- retval = YY_LT_BRACKETSO;
- } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_PARENO) {
- retval = YY_LT_PARENO;
- }
- }
- }
+ if (found) {
+ // Sentence part 1 found
+ said_attach_subtree(newParent, 0x141, 0x149, newNode);
- return retval;
-}
+ newParent = newParent->right;
-static int said_next_node() {
- return ((said_tree_pos == 0) || (said_tree_pos >= VOCAB_TREE_NODES)) ? said_tree_pos = 0 : said_tree_pos++;
-}
+ ret = true;
+ }
-#define SAID_NEXT_NODE said_next_node()
+ bool nonempty;
-static int said_leaf_node(tree_t pos, int value) {
- said_tree[pos].type = kParseTreeLeafNode;
+ found = parsePart2(newParent, nonempty);
- if (value != VALUE_IGNORE)
- said_tree[pos].content.value = value;
+ if (found) {
- return pos;
-}
+ ret = true;
-static int said_branch_node(tree_t pos, int left, int right) {
- said_tree[pos].type = kParseTreeBranchNode;
+ if (nonempty) // non-empty part found
+ newParent = newParent->right;
- if (left != VALUE_IGNORE)
- said_tree[pos].content.branches[0] = left;
- if (right != VALUE_IGNORE)
- said_tree[pos].content.branches[1] = right;
+ found = parsePart3(newParent, nonempty);
- return pos;
-}
+ if (found) {
-static tree_t said_paren(tree_t t1, tree_t t2) {
- if (t1)
- return said_branch_node(SAID_NEXT_NODE, t1, t2);
- else
- return t2;
-}
+ if (nonempty)
+ newParent = newParent->right;
+ }
+ }
-static tree_t said_value(int val, tree_t t) {
- return said_branch_node(SAID_NEXT_NODE, said_leaf_node(SAID_NEXT_NODE, val), t);
+ if (said_tokens[said_token] == TOKEN_GT) {
+ said_token++;
-}
+ newNode = said_branch_node(said_next_node(), 0,
+ said_leaf_node(said_next_node(), TOKEN_GT));
-static tree_t said_terminal(int val) {
- return said_leaf_node(SAID_NEXT_NODE, val);
-}
+ said_attach_subtree(newParent, 0x14B, TOKEN_GT, newNode);
-static tree_t said_aug_branch(int n1, int n2, tree_t t1, tree_t t2) {
- int retval;
+ }
- retval = said_branch_node(SAID_NEXT_NODE,
- said_branch_node(SAID_NEXT_NODE,
- said_leaf_node(SAID_NEXT_NODE, n1),
- said_branch_node(SAID_NEXT_NODE,
- said_leaf_node(SAID_NEXT_NODE, n2),
- t1)
- ),
- t2);
-
-#ifdef SAID_DEBUG
- fprintf(stderr, "AUG(0x%x, 0x%x, [%04x], [%04x]) = [%04x]\n", n1, n2, t1, t2, retval);
-#endif
- return retval;
+ if (ret)
+ return true;
+
+ // Rollback
+ said_token = curToken;
+ said_tree_pos = curTreePos;
+ parentNode->right = curRightChild;
+ return false;
}
-static tree_t said_attach_branch(tree_t base, tree_t attacheant) {
-#ifdef SAID_DEBUG
- fprintf(stderr, "ATT2([%04x], [%04x]) = [%04x]\n", base, attacheant, base);
-#endif
- if (!attacheant)
- return base;
- if (!base)
- return attacheant;
+static bool buildSaidTree() {
+ said_branch_node(said_tree, &said_tree[1], &said_tree[2]);
+ said_leaf_node(&said_tree[1], 0x141); // Magic number #1
+ said_branch_node(&said_tree[2], &said_tree[3], 0);
+ said_leaf_node(&said_tree[3], 0x13f); // Magic number #2
- if (!base)
- return 0; // Happens if we're out of space
+ said_tree_pos = SAID_TREE_START;
- said_branch_node(base, VALUE_IGNORE, attacheant);
+ bool ret = parseSpec(&said_tree[2]);
- return base;
-}
+ if (!ret)
+ return false;
-static said_spec_t said_top_branch(tree_t first) {
-#ifdef SAID_DEBUG
- fprintf(stderr, "TOP([%04x])\n", first);
-#endif
- said_branch_node(0, 1, 2);
- said_leaf_node(1, 0x141); // Magic number #1
- said_branch_node(2, 3, first);
- said_leaf_node(3, 0x13f); // Magic number #2
+ if (said_tokens[said_token] != TOKEN_TERM) {
+ // No terminator, so parse error.
- ++said_blessed;
+ // Rollback
+ said_tree[2].right = 0;
+ said_token = 0;
+ said_tree_pos = SAID_TREE_START;
+ return false;
+ }
- return 0;
+ return true;
}
-static int said_parse_spec(byte *spec) {
+static int said_parse_spec(const byte *spec) {
int nextitem;
- said_parse_error = NULL;
said_token = 0;
said_tokens_nr = 0;
- said_blessed = 0;
said_tree_pos = SAID_TREE_START;
@@ -2025,26 +674,13 @@ static int said_parse_spec(byte *spec) {
} while ((nextitem != SAID_TERM) && (said_tokens_nr < MAX_SAID_TOKENS));
- if (nextitem == SAID_TERM)
- yyparse();
- else {
+ if (nextitem != SAID_TERM) {
warning("SAID spec is too long");
return 1;
}
- if (said_parse_error) {
- warning("Error while parsing SAID spec: %s", said_parse_error);
- free(said_parse_error);
- return 1;
- }
-
- if (said_tree_pos == 0) {
- warning("Out of tree space while parsing SAID spec");
- return 1;
- }
-
- if (said_blessed != 1) {
- warning("Found multiple top branches");
+ if (!buildSaidTree()) {
+ warning("Error while parsing SAID spec");
return 1;
}
@@ -2055,385 +691,304 @@ static int said_parse_spec(byte *spec) {
/**** Augmentation ****/
/**********************/
-// primitive functions
+static bool dontclaim;
+static int outputDepth;
-#define AUG_READ_BRANCH(a, br, p) \
- if (tree[p].type != kParseTreeBranchNode) \
- return 0; \
- a = tree[p].content.branches[br];
+enum ScanSaidType {
+ SCAN_SAID_AND = 0,
+ SCAN_SAID_OR = 1
+};
-#define AUG_READ_VALUE(a, p) \
- if (tree[p].type != kParseTreeLeafNode) \
- return 0; \
- a = tree[p].content.value;
+static int matchTrees(ParseTreeNode* parseT, ParseTreeNode* saidT);
+static int scanSaidChildren(ParseTreeNode* parseT, ParseTreeNode* saidT,
+ ScanSaidType type);
+static int scanParseChildren(ParseTreeNode* parseT, ParseTreeNode* saidT);
-#define AUG_ASSERT(i) \
- if (!i) return 0;
-static int aug_get_next_sibling(parse_tree_node_t *tree, int pos, int *first, int *second) {
- // Returns the next sibling relative to the specified position in 'tree',
- // sets *first and *second to its augment node values, returns the new position
- // or 0 if there was no next sibling
- int seek, valpos;
+static int node_major(ParseTreeNode* node) {
+ assert(node->type == kParseTreeBranchNode);
+ assert(node->left->type == kParseTreeLeafNode);
+ return node->left->value;
+}
+static int node_minor(ParseTreeNode* node) {
+ assert(node->type == kParseTreeBranchNode);
+ assert(node->right->type == kParseTreeBranchNode);
+ assert(node->right->left->type == kParseTreeLeafNode);
+ return node->right->left->value;
+}
+static bool node_is_terminal(ParseTreeNode* node) {
+ return (node->right->right &&
+ node->right->right->type != kParseTreeBranchNode);
+}
+static int node_terminal_value(ParseTreeNode* node) {
+ assert(node_is_terminal(node));
+ return node->right->right->value;
+}
+#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
+static void node_print_desc(ParseTreeNode* node) {
+ assert(node);
+ assert(node->left);
+ if (node->left->type == kParseTreeBranchNode) {
+ scidprintf("< ");
+ node_print_desc(node->left);
+ scidprintf(", ...>");
+ } else {
+ if (node_is_terminal(node)) {
+ scidprintf("(%03x %03x %03x)", node_major(node),
+ node_minor(node),
+ node_terminal_value(node));
+ } else {
+ scidprintf("(%03x %03x <...>)", node_major(node),
+ node_minor(node));
+ }
+ }
+}
+#else
+static void node_print_desc(ParseTreeNode*) { }
+#endif
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(seek, 0, pos);
- AUG_ASSERT(seek);
- // Now retrieve first value
- AUG_READ_BRANCH(valpos, 0, seek);
- AUG_ASSERT(valpos);
- AUG_READ_VALUE(*first, valpos);
- // Get second value
- AUG_READ_BRANCH(seek, 1, seek);
- AUG_ASSERT(seek);
- AUG_READ_BRANCH(valpos, 0, seek);
- AUG_ASSERT(valpos);
- AUG_READ_VALUE(*second, valpos);
- return pos;
-}
+static int matchTrees(ParseTreeNode* parseT, ParseTreeNode* saidT)
+{
+ outputDepth++;
+ scidprintf("%*smatchTrees on ", outputDepth, "");
+ node_print_desc(parseT);
+ scidprintf(" and ");
+ node_print_desc(saidT);
+ scidprintf("\n");
-static int aug_get_wgroup(parse_tree_node_t *tree, int pos) {
- // Returns 0 if pos in tree is not the root of a 3-element list, otherwise
- // it returns the last element (which, in practice, is the word group
- int val;
+ bool inParen = node_minor(saidT) == 0x14F || node_minor(saidT) == 0x150;
+ bool inBracket = node_major(saidT) == 0x152;
- AUG_READ_BRANCH(pos, 0, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_VALUE(val, pos);
+ int ret;
- return val;
-}
+ if (node_major(parseT) != 0x141 &&
+ node_major(saidT) != 0x141 && node_major(saidT) != 0x152 &&
+ node_major(saidT) != node_major(parseT))
+ {
+ ret = -1;
+ }
-static int aug_get_base_node(parse_tree_node_t *tree) {
- int startpos = 0;
- AUG_READ_BRANCH(startpos, 1, startpos);
+ // parse major is 0x141 and/or
+ // said major is 0x141/0x152 and/or
+ // said major is parse major
- return startpos;
-}
+ else if (node_is_terminal(saidT) && node_is_terminal(parseT) ) {
-// semi-primitive functions
+ // both saidT and parseT are terminals
-static int aug_get_first_child(parse_tree_node_t *tree, int pos, int *first, int *second) {
- // like aug_get_next_sibling, except that it recurses into the tree and
- // finds the first child (usually *not* Ayanami Rei) of the current branch
- // rather than its next sibling.
- AUG_READ_BRANCH(pos, 0, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
+ int said_val = node_terminal_value(saidT);
+ int parse_val = node_terminal_value(parseT);
- return aug_get_next_sibling(tree, pos, first, second);
-}
+ if (said_val != WORD_NONE &&
+ (said_val == parse_val || said_val == WORD_ANY ||
+ parse_val == WORD_ANY))
+ ret = 1;
+ else
+ ret = -1;
-static void aug_find_words_recursively(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr,
- int *ref_words, int *ref_words_nr, int maxwords, int refbranch) {
- // Finds and lists all base (141) and reference (144) words */
- int major, minor;
- int word;
- int pos = aug_get_first_child(tree, startpos, &major, &minor);
+ scidprintf("%*smatchTrees matching terminals: %03x vs %03x (%d)\n",
+ outputDepth, "", parse_val, said_val, ret);
- //if (major == WORD_TYPE_REF)
- // refbranch = 1;
+ } else if (node_is_terminal(saidT) && !node_is_terminal(parseT)) {
- while (pos) {
- if ((word = aug_get_wgroup(tree, pos))) { // found a word
- if (!refbranch && major == WORD_TYPE_BASE) {
- if ((*base_words_nr) == maxwords) {
- warning("Out of regular words");
- return; // return gracefully
- }
+ // saidT is a terminal, but parseT isn't
- base_words[*base_words_nr] = word; // register word
- ++(*base_words_nr);
+ if (node_major(parseT) == 0x141 ||
+ node_major(parseT) == node_major(saidT))
+ ret = scanParseChildren(parseT->right->right, saidT);
+ else
+ ret = 0;
- }
- if (major == WORD_TYPE_REF || refbranch) {
- if ((*ref_words_nr) == maxwords) {
- warning("Out of reference words");
- return; // return gracefully
- }
+ } else if (node_is_terminal(parseT)) {
- ref_words[*ref_words_nr] = word; // register word
- ++(*ref_words_nr);
+ // parseT is a terminal, but saidT isn't
- }
- if (major != WORD_TYPE_SYNTACTIC_SUGAR && major != WORD_TYPE_BASE && major != WORD_TYPE_REF)
- warning("aug_find_words_recursively(): Unknown word type %03x", major);
+ if (node_major(saidT) == 0x141 || node_major(saidT) == 0x152 ||
+ node_major(saidT) == node_major(parseT))
+ ret = scanSaidChildren(parseT, saidT->right->right,
+ inParen ? SCAN_SAID_OR : SCAN_SAID_AND );
+ else
+ ret = 0;
- } else // Did NOT find a word group: Attempt to recurse
- aug_find_words_recursively(tree, pos, base_words, base_words_nr,
- ref_words, ref_words_nr, maxwords, refbranch || major == WORD_TYPE_REF);
+ } else if (node_major(saidT) != 0x141 && node_major(saidT) != 0x152 &&
+ node_major(saidT) != node_major(parseT)) {
- pos = aug_get_next_sibling(tree, pos, &major, &minor);
- }
-}
+ // parseT and saidT both aren't terminals
+ // said major is not 0x141 or 0x152 or parse major
+ ret = scanParseChildren(parseT->right->right, saidT);
-static void aug_find_words(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr,
- int *ref_words, int *ref_words_nr, int maxwords) {
- // initializing wrapper for aug_find_words_recursively()
- *base_words_nr = 0;
- *ref_words_nr = 0;
+ } else {
- aug_find_words_recursively(tree, startpos, base_words, base_words_nr, ref_words, ref_words_nr, maxwords, 0);
-}
+ // parseT and saidT are both not terminals,
+ // said major 0x141 or 0x152 or equal to parse major
+ ret = scanSaidChildren(parseT->right->right, saidT->right->right,
+ inParen ? SCAN_SAID_OR : SCAN_SAID_AND);
-static int aug_contains_word(int *list, int length, int word) {
- int i;
+ }
- if (word == ANYWORD)
- return (length);
+ if (inBracket && ret == 0) {
+ scidprintf("%*smatchTrees changing ret to 1 due to brackets\n",
+ outputDepth, "");
+ ret = 1;
+ }
- for (i = 0; i < length; i++)
- if (list[i] == word)
- return 1;
+ scidprintf("%*smatchTrees returning %d\n", outputDepth, "", ret);
+ outputDepth--;
- return 0;
+ return ret;
}
-static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_branch, int major, int minor, int *base_words, int base_words_nr,
- int *ref_words, int ref_words_nr);
+static int scanSaidChildren(ParseTreeNode* parseT, ParseTreeNode* saidT,
+ ScanSaidType type) {
+ outputDepth++;
+ scidprintf("%*sscanSaid(%s) on ", outputDepth, "",
+ type == SCAN_SAID_OR ? "OR" : "AND");
+ node_print_desc(parseT);
+ scidprintf(" and ");
+ node_print_desc(saidT);
+ scidprintf("\n");
-static int augment_match_expression_p(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_basepos, int major, int minor,
- int *base_words, int base_words_nr, int *ref_words, int ref_words_nr) {
- int cmajor, cminor, cpos;
- cpos = aug_get_first_child(saidt, augment_pos, &cmajor, &cminor);
- if (!cpos) {
- warning("augment_match_expression_p(): Empty condition");
- return 1;
- }
+ int ret = 1;
- scidprintf("Attempting to match (%03x %03x (%03x %03x\n", major, minor, cmajor, cminor);
-
- if ((major == WORD_TYPE_BASE) && (minor == AUGMENT_SENTENCE_MINOR_RECURSE))
- return augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr);
-
- switch (major) {
-
- case WORD_TYPE_BASE:
- while (cpos) {
- if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) {
- int word = aug_get_wgroup(saidt, cpos);
- scidprintf("Looking for word %03x\n", word);
-
- if (aug_contains_word(base_words, base_words_nr, word))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) {
- if (augment_sentence_expression(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) {
- int gc_major, gc_minor;
- int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor);
-
- while (gchild) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major,
- minor, base_words, base_words_nr,
- ref_words, ref_words_nr))
- return 1;
- gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor);
- }
- } else
- warning("augment_match_expression_p(): Unknown type 141 minor number %3x", cminor);
-
- cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor);
+ assert(!(type == SCAN_SAID_OR && !saidT));
- }
- break;
-
- case WORD_TYPE_REF:
- while (cpos) {
- if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) {
- int word = aug_get_wgroup(saidt, cpos);
- scidprintf("Looking for refword %03x\n", word);
-
- if (aug_contains_word(ref_words, ref_words_nr, word))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) {
- int gc_major, gc_minor;
- int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor);
-
- while (gchild) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major,
- minor, base_words, base_words_nr,
- ref_words, ref_words_nr))
- return 1;
- gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor);
- }
- } else
- warning("augment_match_expression_p(): Unknown type 144 minor number %3x", cminor);
-
- cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor);
+ while (saidT) {
+ assert(saidT->type == kParseTreeBranchNode);
- }
- break;
+ ParseTreeNode* saidChild = saidT->left;
+ assert(saidChild);
- case AUGMENT_SENTENCE_PART_BRACKETS:
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
+ if (node_major(saidChild) != 0x145) {
- scidprintf("Didn't match subexpression; checking sub-bracked predicate %03x\n", cmajor);
+ ret = scanParseChildren(parseT, saidChild);
- switch (cmajor) {
- case WORD_TYPE_BASE:
- if (!base_words_nr)
- return 1;
- break;
+ if (type == SCAN_SAID_AND && ret != 1)
+ break;
- case WORD_TYPE_REF:
- if (!ref_words_nr)
- return 1;
- break;
+ if (type == SCAN_SAID_OR && ret == 1)
+ break;
- default:
- warning("augment_match_expression_p(): (subp1) Unkonwn sub-bracket predicate %03x", cmajor);
}
- break;
+ saidT = saidT->right;
- default:
- warning("augment_match_expression_p(): Unknown predicate %03x", major);
+ }
+ scidprintf("%*sscanSaid returning %d\n", outputDepth, "", ret);
+ outputDepth--;
+ return ret;
+}
+
+
+static int scanParseChildren(ParseTreeNode* parseT, ParseTreeNode* saidT) {
+
+ outputDepth++;
+ scidprintf("%*sscanParse on ", outputDepth, "");
+ node_print_desc(parseT);
+ scidprintf(" and ");
+ node_print_desc(saidT);
+ scidprintf("\n");
+
+ if (node_major(saidT) == 0x14B) {
+ dontclaim = true;
+ scidprintf("%*sscanParse returning 1 (0x14B)\n", outputDepth, "");
+ outputDepth--;
+ return 1;
}
- scidprintf("augment_match_expression_p(): Generic failure\n");
+ bool inParen = node_minor(saidT) == 0x14F || node_minor(saidT) == 0x150;
+ bool inBracket = node_major(saidT) == 0x152;
- return 0;
-}
+ int ret;
-static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_branch, int major, int minor, int *base_words, int base_words_nr,
- int *ref_words, int ref_words_nr) {
- int check_major, check_minor;
- int check_pos = aug_get_first_child(saidt, augment_pos, &check_major, &check_minor);
- do {
- if (!(augment_match_expression_p(saidt, check_pos, parset, parse_branch, check_major, check_minor,
- base_words, base_words_nr, ref_words, ref_words_nr)))
- return 0;
- } while ((check_pos = aug_get_next_sibling(saidt, check_pos, &check_major, &check_minor)));
+ // descend further down saidT before actually scanning parseT
+ if ((node_major(saidT) == 0x141 || node_major(saidT) == 0x152) &&
+ !node_is_terminal(saidT)) {
- return 1;
-}
+ ret = scanSaidChildren(parseT, saidT->right->right,
+ inParen ? SCAN_SAID_OR : SCAN_SAID_AND );
-static int augment_sentence_part(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, int parse_basepos, int major, int minor) {
- int pmajor, pminor;
- int parse_branch = parse_basepos;
- int optional = 0;
- int foundwords = 0;
+ } else if (parseT && parseT->left->type == kParseTreeBranchNode) {
- scidprintf("Augmenting (%03x %03x\n", major, minor);
+ ret = 0;
+ int subresult = 0;
- if (major == AUGMENT_SENTENCE_PART_BRACKETS) { // '[/ foo]' is true if '/foo' or if there
- // exists no x for which '/x' is true
- if ((augment_pos = aug_get_first_child(saidt, augment_pos, &major, &minor))) {
- scidprintf("Optional part: Now augmenting (%03x %03x\n", major, minor);
- optional = 1;
- } else {
- scidprintf("Matched empty optional expression\n");
- return 1;
- }
- }
+ while (parseT) {
+ assert(parseT->type == kParseTreeBranchNode);
- if ((major < 0x141) || (major > 0x143)) {
- scidprintf("augment_sentence_part(): Unexpected sentence part major number %03x\n", major);
- return 0;
- }
+ ParseTreeNode* parseChild = parseT->left;
+ assert(parseChild);
- while ((parse_branch = aug_get_next_sibling(parset, parse_branch, &pmajor, &pminor))) {
- if (pmajor == major) { // found matching sentence part
- int success;
- int base_words_nr;
- int ref_words_nr;
- int base_words[AUGMENT_MAX_WORDS];
- int ref_words[AUGMENT_MAX_WORDS];
-#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
- int i;
-#endif
+ scidprintf("%*sscanning next: ", outputDepth, "");
+ node_print_desc(parseChild);
+ scidprintf("\n");
- scidprintf("Found match with pminor = %03x\n", pminor);
- aug_find_words(parset, parse_branch, base_words, &base_words_nr, ref_words, &ref_words_nr, AUGMENT_MAX_WORDS);
- foundwords |= (ref_words_nr | base_words_nr);
-#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
- printf("%d base words:", base_words_nr);
- for (i = 0; i < base_words_nr; i++)
- printf(" %03x", base_words[i]);
- printf("\n%d reference words:", ref_words_nr);
- for (i = 0; i < ref_words_nr; i++)
- printf(" %03x", ref_words[i]);
- printf("\n");
-#endif
+ if (node_major(parseChild) == node_major(saidT) ||
+ node_major(parseChild) == 0x141)
+ subresult = matchTrees(parseChild, saidT);
- success = augment_sentence_expression(saidt, augment_pos, parset, parse_basepos, major, minor,
- base_words, base_words_nr, ref_words, ref_words_nr);
+ if (subresult != 0)
+ ret = subresult;
+
+ if (ret == 1)
+ break;
+
+ parseT = parseT->right;
- if (success) {
- scidprintf("SUCCESS on augmenting (%03x %03x\n", major, minor);
- return 1;
- }
}
+
+ // ret is now:
+ // 1 if ANY matchTrees(parseSibling, saidTree) returned 1
+ // ELSE: -1 if ANY returned -1
+ // ELSE: 0
+
+ } else {
+
+ ret = matchTrees(parseT, saidT);
+
}
- if (optional && (foundwords == 0)) {
- scidprintf("Found no words and optional branch => SUCCESS on augmenting (%03x %03x\n", major, minor);
- return 1;
+ if (inBracket && ret == 0) {
+ scidprintf("%*sscanParse changing ret to 1 due to brackets\n",
+ outputDepth, "");
+ ret = 1;
}
- scidprintf("FAILURE on augmenting (%03x %03x\n", major, minor);
- return 0;
+ scidprintf("%*sscanParse returning %d\n", outputDepth, "", ret);
+ outputDepth--;
+
+ return ret;
}
-static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *saidt) {
- int augment_basepos = 0;
- int parse_basepos;
- int major, minor;
- int dontclaim = 0;
- parse_basepos = aug_get_base_node(parset);
- if (!parse_basepos) {
- warning("augment_parse_nodes(): Parse tree is corrupt");
- return 0;
- }
- augment_basepos = aug_get_base_node(saidt);
- if (!augment_basepos) {
- warning("augment_parse_nodes(): Said tree is corrupt");
- return 0;
- }
+static int augment_parse_nodes(ParseTreeNode *parseT, ParseTreeNode *saidT) {
+ outputDepth = 0;
+ scidprintf("augment_parse_nodes on ");
+ node_print_desc(parseT);
+ scidprintf(" and ");
+ node_print_desc(saidT);
+ scidprintf("\n");
- while ((augment_basepos = aug_get_next_sibling(saidt, augment_basepos, &major, &minor))) {
- if ((major == 0x14b) && (minor == SAID_LONG(SAID_GT)))
- dontclaim = 1; // special case
- else // normal sentence part
- if (!(augment_sentence_part(saidt, augment_basepos, parset, parse_basepos, major, minor))) {
- scidprintf("Returning failure\n");
- return 0; // fail
- }
- }
+ dontclaim = false;
+
+ int ret = matchTrees(parseT, saidT);
+
+ scidprintf("matchTrees returned %d\n", ret);
- scidprintf("Returning success with dontclaim=%d\n", dontclaim);
+ if (ret != 1)
+ return 0;
if (dontclaim)
return SAID_PARTIAL_MATCH;
- else
- return 1; // full match
+
+ return 1;
}
@@ -2441,22 +996,19 @@ static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *sai
/**** Main code ****/
/*******************/
-int said(EngineState *s, byte *spec, bool verbose) {
+int said(EngineState *s, const byte *spec, bool verbose) {
int retval;
Vocabulary *voc = g_sci->getVocabulary();
- parse_tree_node_t *parse_tree_ptr = voc->_parserNodes;
+ ParseTreeNode *parse_tree_ptr = voc->_parserNodes;
if (voc->parserIsValid) {
- if (said_parse_spec(spec)) {
- printf("Offending spec was: ");
- voc->decipherSaidBlock(spec);
+ if (said_parse_spec(spec))
return SAID_NO_MATCH;
- }
if (verbose)
- vocab_dump_parse_tree("Said-tree", said_tree); // Nothing better to do yet
- retval = augment_parse_nodes(parse_tree_ptr, &(said_tree[0]));
+ vocab_dump_parse_tree("Said-tree", said_tree);
+ retval = augment_parse_nodes(parse_tree_ptr, said_tree);
if (!retval)
return SAID_NO_MATCH;
@@ -2470,15 +1022,108 @@ int said(EngineState *s, byte *spec, bool verbose) {
}
-#ifdef SAID_DEBUG_PROGRAM
-int main (int argc, char *argv) {
- byte block[] = {0x01, 0x00, 0xf8, 0xf5, 0x02, 0x01, 0xf6, 0xf2, 0x02, 0x01, 0xf2, 0x01, 0x03, 0xff};
- EngineState s;
+/*
+
+Some test expressions for in the ScummVM debugging console, using
+Codename: ICEMAN's vocabulary:
+
+
+
+said green board & [!*] / 8af < 1f6
+True
+
+said get green board & [!*] / 8af < 1f6
+False
+
+said green board & [!*] / 8af [< 1f6 ]
+True
+
+said climb up & 19b , 426 [< 142 ] [/ 81e ]
+True
+
+said climb up ladder & 19b , 426 [< 142 ] [/ 81e ]
+True
+
+said climb down & 19b , 426 [< 142 ] [/ 81e ]
+False
+
+said climb up tree & 19b , 426 [< 142 ] [/ 81e ]
+False
+
+said climb up & 19b , 446 , 426 [< 143 ] [/ 81e ]
+False
+
+said climb down & 19b , 446 , 426 [< 143 ] [/ 81e ]
+True
+
+said use green device & 1a5 / 8c1 [< 21d ]
+False
+
+said use electronic device & 1a5 / 8c1 [< 21d ]
+True
+
+said use device & 1a5 / 8c1 [< 21d ]
+True
+
+said eat & 429 [/ !* ]
+True
+
+said eat ladder & 429 [/ !* ]
+False
+
+said look at the ladder & 3f8 / 81e [< !* ]
+True
+
+said look at the green ladder & 3f8 / 81e [< !* ]
+False
+
+said look green book & / 7f6 [< 8d2 ]
+False
+
+said look green book & 3f8 [< ca ]
+True
+
+said get a blue board for the green ladder & 3f9 / 8af [ < 1f6 ] / 81e < 1f6
+False
+
+said get a board for the green ladder & 3f9 / 8af [ < 1f6 ] / 81e < 1f6
+True
+
+said get a blue board & 3f9 / 8af [ < 1f6 ]
+False
+
+said get up & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ]
+True
+
+said get left & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ]
+False
+
+said look down & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ]
+True
+
+said get & ( 3f8 , 3f9 ) [ < ( 142 , 143 ) ]
+True
+
+said put washer on shaft & 455 , ( 3fa < cb ) / 8c6
+True
+
+said depth correct & [!*] < 8b1 / 22
+True
+
+said depth acknowledged & / 46d , 460 , 44d < 8b1
+True
+
+said depth confirmed & / 46d , 460 , 44d < 8b1
+True
+
+said depth attained & / 46d , 460 , 44d < 8b1
+True
+
+
+*/
+
+
- s.parser_valid = 1;
- said(&s, block);
-}
-#endif
} // End of namespace Sci
diff --git a/engines/sci/parser/said.y b/engines/sci/parser/said.y
deleted file mode 100644
index cbb2ff3e62..0000000000
--- a/engines/sci/parser/said.y
+++ /dev/null
@@ -1,839 +0,0 @@
-%{
-/* ScummVM - Graphic Adventure Engine
- *
- * ScummVM is the legal property of its developers, whose names
- * are too numerous to list here. Please refer to the COPYRIGHT
- * file distributed with this source distribution.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * $URL$
- * $Id$
- *
- */
-
-#include "sci/engine/state.h"
-
-
-// Bison generates an empty switch statement that gives a warning in MSVC.
-// This disables that warning.
-#ifdef _MSC_VER
-#pragma warning(disable:4065)
-#endif
-
-
-namespace Sci {
-
-#define SAID_BRANCH_NULL 0
-
-#define MAX_SAID_TOKENS 128
-
-// Maximum number of words to be expected in a parsed sentence
-#define AUGMENT_MAX_WORDS 64
-
-
-#define ANYWORD 0xfff
-
-#define WORD_TYPE_BASE 0x141
-#define WORD_TYPE_REF 0x144
-#define WORD_TYPE_SYNTACTIC_SUGAR 0x145
-
-#define AUGMENT_SENTENCE_PART_BRACKETS 0x152
-
-// Minor numbers
-#define AUGMENT_SENTENCE_MINOR_MATCH_PHRASE 0x14c
-#define AUGMENT_SENTENCE_MINOR_MATCH_WORD 0x153
-#define AUGMENT_SENTENCE_MINOR_RECURSE 0x144
-#define AUGMENT_SENTENCE_MINOR_PARENTHESES 0x14f
-
-
-#undef YYDEBUG /*1*/
-//#define SAID_DEBUG*/
-//#define SCI_DEBUG_PARSE_TREE_AUGMENTATION // uncomment to debug parse tree augmentation
-
-
-#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
-#define scidprintf printf
-#else
-void print_nothing(...) { }
-#define scidprintf print_nothing
-#endif
-
-
-static char *said_parse_error;
-
-static int said_token;
-static int said_tokens_nr;
-static int said_tokens[MAX_SAID_TOKENS];
-static int said_blessed; // increminated by said_top_branch
-
-static int said_tree_pos; // Set to 0 if we're out of space
-#define SAID_TREE_START 4; // Reserve space for the 4 top nodes
-
-#define VALUE_IGNORE -424242
-
-static parse_tree_node_t said_tree[VOCAB_TREE_NODES];
-
-typedef int wgroup_t;
-typedef int tree_t;
-typedef int said_spec_t;
-
-static tree_t said_aug_branch(int, int, tree_t, tree_t);
-static tree_t said_attach_branch(tree_t, tree_t);
-/*
-static tree_t said_wgroup_branch(wgroup_t);
-*/
-static said_spec_t said_top_branch(tree_t);
-static tree_t said_paren(tree_t, tree_t);
-static tree_t said_value(int, tree_t);
-static tree_t said_terminal(int);
-
-static int yylex();
-
-static int yyerror(const char *s) {
- said_parse_error = strdup(s);
- return 1; /* Abort */
-}
-
-%}
-
-%token WGROUP /* Word group */
-%token YY_COMMA /* 0xf0 */
-%token YY_AMP /* 0xf1 */
-%token YY_SLASH /* 0xf2 */
-%token YY_PARENO /* 0xf3 */
-%token YY_PARENC /* 0xf4 */
-%token YY_BRACKETSO /* 0xf5 */
-%token YY_BRACKETSC /* 0xf6 */
-%token YY_HASH /* 0xf7 */
-%token YY_LT /* 0xf8 */
-%token YY_GT /* 0xf9 */
-%token YY_BRACKETSO_LT /* special token used to imitate LR(2) behaviour */
-%token YY_BRACKETSO_SLASH /* special token used to imitate LR(2) behaviour */
-%token YY_LT_BRACKETSO /* special token used to imitate LR(2) behaviour */
-%token YY_LT_PARENO /* special token used to imitate LR(2) behaviour */
-
-%%
-
-saidspec : leftspec optcont
- { $$ = said_top_branch(said_attach_branch($1, $2)); }
- | leftspec midspec optcont
- { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, $3))); }
- | leftspec midspec rightspec optcont
- { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, said_attach_branch($3, $4)))); }
- ;
-
-
-optcont : /* empty */
- { $$ = SAID_BRANCH_NULL; }
- | YY_GT
- { $$ = said_paren(said_value(0x14b, said_value(0xf900, said_terminal(0xf900))), SAID_BRANCH_NULL); }
- ;
-
-
-
-leftspec : /* empty */
- { $$ = SAID_BRANCH_NULL; }
- | expr
- { $$ = said_paren(said_value(0x141, said_value(0x149, $1)), SAID_BRANCH_NULL); }
- ;
-
-
-
-midspec : YY_SLASH expr
- { $$ = said_aug_branch(0x142, 0x14a, $2, SAID_BRANCH_NULL); }
- | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC
- { $$ = said_aug_branch(0x152, 0x142, said_aug_branch(0x142, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- | YY_SLASH
- { $$ = SAID_BRANCH_NULL; }
- ;
-
-
-
-rightspec : YY_SLASH expr
- { $$ = said_aug_branch(0x143, 0x14a, $2, SAID_BRANCH_NULL); }
- | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC
- { $$ = said_aug_branch(0x152, 0x143, said_aug_branch(0x143, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- | YY_SLASH
- { $$ = SAID_BRANCH_NULL; }
- ;
-
-
-word : WGROUP
- { $$ = said_paren(said_value(0x141, said_value(0x153, said_terminal($1))), SAID_BRANCH_NULL); }
- ;
-
-
-cwordset : wordset
- { $$ = said_aug_branch(0x141, 0x14f, $1, SAID_BRANCH_NULL); }
- | YY_BRACKETSO wordset YY_BRACKETSC
- { $$ = said_aug_branch(0x141, 0x14f, said_aug_branch(0x152, 0x14c, said_aug_branch(0x141, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- ;
-
-
-wordset : word
- { $$ = $1; }
- | YY_PARENO expr YY_PARENC
- { $$ = said_aug_branch(0x141, 0x14c, $2, SAID_BRANCH_NULL); }
- | wordset YY_COMMA wordset
- { $$ = said_attach_branch($1, $3); }
- | wordset YY_BRACKETSO_LT wordrefset YY_BRACKETSC
- { $$ = said_attach_branch($1, $3); }
- | wordset YY_COMMA YY_BRACKETSO wordset YY_BRACKETSC
- { $$ = said_attach_branch($1, $3); }
- ;
-
-
-expr : cwordset cwordrefset
- { $$ = said_attach_branch($1, $2); }
- | cwordset
- { $$ = $1; }
- | cwordrefset
- { $$ = $1; }
- ;
-
-
-cwordrefset : wordrefset
- { $$ = $1; }
- | YY_BRACKETSO_LT wordrefset YY_BRACKETSC
- { $$ = said_aug_branch(0x152, 0x144, $2, SAID_BRANCH_NULL); }
- | wordrefset YY_BRACKETSO_LT wordrefset YY_BRACKETSC
- { $$ = said_attach_branch($1, said_aug_branch(0x152, 0x144, $3, SAID_BRANCH_NULL)); }
- ;
-
-
-wordrefset : YY_LT word recref
- { $$ = said_aug_branch(0x144, 0x14f, $2, $3); }
- | YY_LT_PARENO YY_PARENO expr YY_PARENC
- { $$ = said_aug_branch(0x144, 0x14f, said_aug_branch(0x141, 0x144, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- | YY_LT wordset
- { $$ = said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL); }
- | YY_LT_BRACKETSO YY_BRACKETSO wordset YY_BRACKETSC
- { $$ = said_aug_branch(0x152, 0x144, said_aug_branch(0x144, 0x14f, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- ;
-
-
-recref : YY_LT wordset recref
- { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), $3); }
- | YY_LT wordset
- { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); }
- | YY_LT_PARENO YY_PARENO expr YY_PARENC
- { $$ = said_aug_branch(0x141, 0x14c, $2, SAID_BRANCH_NULL); }
- ;
-
-%%
-
-int parse_yy_token_lookup[] = {YY_COMMA, YY_AMP, YY_SLASH, YY_PARENO, YY_PARENC, YY_BRACKETSO, YY_BRACKETSC, YY_HASH, YY_LT, YY_GT};
-
-static int yylex() {
- int retval = said_tokens[said_token++];
-
- if (retval < SAID_LONG(SAID_FIRST)) {
- yylval = retval;
- retval = WGROUP;
- } else {
- retval >>= 8;
-
- if (retval == SAID_TERM)
- retval = 0;
- else {
- assert(retval >= SAID_FIRST);
- retval = parse_yy_token_lookup[retval - SAID_FIRST];
- if (retval == YY_BRACKETSO) {
- if ((said_tokens[said_token] >> 8) == SAID_LT)
- retval = YY_BRACKETSO_LT;
- else
- if ((said_tokens[said_token] >> 8) == SAID_SLASH)
- retval = YY_BRACKETSO_SLASH;
- } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_BRACKO) {
- retval = YY_LT_BRACKETSO;
- } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_PARENO) {
- retval = YY_LT_PARENO;
- }
- }
- }
-
- return retval;
-}
-
-static int said_next_node() {
- return ((said_tree_pos == 0) || (said_tree_pos >= VOCAB_TREE_NODES)) ? said_tree_pos = 0 : said_tree_pos++;
-}
-
-#define SAID_NEXT_NODE said_next_node()
-
-static int said_leaf_node(tree_t pos, int value) {
- said_tree[pos].type = kParseTreeLeafNode;
-
- if (value != VALUE_IGNORE)
- said_tree[pos].content.value = value;
-
- return pos;
-}
-
-static int said_branch_node(tree_t pos, int left, int right) {
- said_tree[pos].type = kParseTreeBranchNode;
-
- if (left != VALUE_IGNORE)
- said_tree[pos].content.branches[0] = left;
-
- if (right != VALUE_IGNORE)
- said_tree[pos].content.branches[1] = right;
-
- return pos;
-}
-
-static tree_t said_paren(tree_t t1, tree_t t2) {
- if (t1)
- return said_branch_node(SAID_NEXT_NODE, t1, t2);
- else
- return t2;
-}
-
-static tree_t said_value(int val, tree_t t) {
- return said_branch_node(SAID_NEXT_NODE, said_leaf_node(SAID_NEXT_NODE, val), t);
-
-}
-
-static tree_t said_terminal(int val) {
- return said_leaf_node(SAID_NEXT_NODE, val);
-}
-
-static tree_t said_aug_branch(int n1, int n2, tree_t t1, tree_t t2) {
- int retval;
-
- retval = said_branch_node(SAID_NEXT_NODE,
- said_branch_node(SAID_NEXT_NODE,
- said_leaf_node(SAID_NEXT_NODE, n1),
- said_branch_node(SAID_NEXT_NODE,
- said_leaf_node(SAID_NEXT_NODE, n2),
- t1)
- ),
- t2);
-
-#ifdef SAID_DEBUG
- fprintf(stderr, "AUG(0x%x, 0x%x, [%04x], [%04x]) = [%04x]\n", n1, n2, t1, t2, retval);
-#endif
-
- return retval;
-}
-
-static tree_t said_attach_branch(tree_t base, tree_t attacheant) {
-#ifdef SAID_DEBUG
- fprintf(stderr, "ATT2([%04x], [%04x]) = [%04x]\n", base, attacheant, base);
-#endif
-
- if (!attacheant)
- return base;
- if (!base)
- return attacheant;
-
- if (!base)
- return 0; // Happens if we're out of space
-
- said_branch_node(base, VALUE_IGNORE, attacheant);
-
- return base;
-}
-
-static said_spec_t said_top_branch(tree_t first) {
-#ifdef SAID_DEBUG
- fprintf(stderr, "TOP([%04x])\n", first);
-#endif
- said_branch_node(0, 1, 2);
- said_leaf_node(1, 0x141); // Magic number #1
- said_branch_node(2, 3, first);
- said_leaf_node(3, 0x13f); // Magic number #2
-
- ++said_blessed;
-
- return 0;
-}
-
-static int said_parse_spec(byte *spec) {
- int nextitem;
-
- said_parse_error = NULL;
- said_token = 0;
- said_tokens_nr = 0;
- said_blessed = 0;
-
- said_tree_pos = SAID_TREE_START;
-
- do {
- nextitem = *spec++;
- if (nextitem < SAID_FIRST)
- said_tokens[said_tokens_nr++] = nextitem << 8 | *spec++;
- else
- said_tokens[said_tokens_nr++] = SAID_LONG(nextitem);
-
- } while ((nextitem != SAID_TERM) && (said_tokens_nr < MAX_SAID_TOKENS));
-
- if (nextitem == SAID_TERM)
- yyparse();
- else {
- warning("SAID spec is too long");
- return 1;
- }
-
- if (said_parse_error) {
- warning("Error while parsing SAID spec: %s", said_parse_error);
- free(said_parse_error);
- return 1;
- }
-
- if (said_tree_pos == 0) {
- warning("Out of tree space while parsing SAID spec");
- return 1;
- }
-
- if (said_blessed != 1) {
- warning("Found multiple top branches");
- return 1;
- }
-
- return 0;
-}
-
-/**********************/
-/**** Augmentation ****/
-/**********************/
-
-// primitive functions
-
-#define AUG_READ_BRANCH(a, br, p) \
- if (tree[p].type != kParseTreeBranchNode) \
- return 0; \
- a = tree[p].content.branches[br];
-
-#define AUG_READ_VALUE(a, p) \
- if (tree[p].type != kParseTreeLeafNode) \
- return 0; \
- a = tree[p].content.value;
-
-#define AUG_ASSERT(i) \
- if (!i) return 0;
-
-static int aug_get_next_sibling(parse_tree_node_t *tree, int pos, int *first, int *second) {
- // Returns the next sibling relative to the specified position in 'tree',
- // sets *first and *second to its augment node values, returns the new position
- // or 0 if there was no next sibling
- int seek, valpos;
-
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(seek, 0, pos);
- AUG_ASSERT(seek);
-
- // Now retrieve first value
- AUG_READ_BRANCH(valpos, 0, seek);
- AUG_ASSERT(valpos);
- AUG_READ_VALUE(*first, valpos);
-
- // Get second value
- AUG_READ_BRANCH(seek, 1, seek);
- AUG_ASSERT(seek);
- AUG_READ_BRANCH(valpos, 0, seek);
- AUG_ASSERT(valpos);
- AUG_READ_VALUE(*second, valpos);
-
- return pos;
-}
-
-static int aug_get_wgroup(parse_tree_node_t *tree, int pos) {
- // Returns 0 if pos in tree is not the root of a 3-element list, otherwise
- // it returns the last element (which, in practice, is the word group
- int val;
-
- AUG_READ_BRANCH(pos, 0, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
- AUG_READ_VALUE(val, pos);
-
- return val;
-}
-
-static int aug_get_base_node(parse_tree_node_t *tree) {
- int startpos = 0;
- AUG_READ_BRANCH(startpos, 1, startpos);
-
- return startpos;
-}
-
-// semi-primitive functions
-
-static int aug_get_first_child(parse_tree_node_t *tree, int pos, int *first, int *second) {
- // like aug_get_next_sibling, except that it recurses into the tree and
- // finds the first child (usually *not* Ayanami Rei) of the current branch
- // rather than its next sibling.
- AUG_READ_BRANCH(pos, 0, pos);
- AUG_ASSERT(pos);
- AUG_READ_BRANCH(pos, 1, pos);
- AUG_ASSERT(pos);
-
- return aug_get_next_sibling(tree, pos, first, second);
-}
-
-static void aug_find_words_recursively(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr,
- int *ref_words, int *ref_words_nr, int maxwords, int refbranch) {
- // Finds and lists all base (141) and reference (144) words */
- int major, minor;
- int word;
- int pos = aug_get_first_child(tree, startpos, &major, &minor);
-
- //if (major == WORD_TYPE_REF)
- // refbranch = 1;
-
- while (pos) {
- if ((word = aug_get_wgroup(tree, pos))) { // found a word
- if (!refbranch && major == WORD_TYPE_BASE) {
- if ((*base_words_nr) == maxwords) {
- warning("Out of regular words");
- return; // return gracefully
- }
-
- base_words[*base_words_nr] = word; // register word
- ++(*base_words_nr);
-
- }
- if (major == WORD_TYPE_REF || refbranch) {
- if ((*ref_words_nr) == maxwords) {
- warning("Out of reference words");
- return; // return gracefully
- }
-
- ref_words[*ref_words_nr] = word; // register word
- ++(*ref_words_nr);
-
- }
- if (major != WORD_TYPE_SYNTACTIC_SUGAR && major != WORD_TYPE_BASE && major != WORD_TYPE_REF)
- warning("aug_find_words_recursively(): Unknown word type %03x", major);
-
- } else // Did NOT find a word group: Attempt to recurse
- aug_find_words_recursively(tree, pos, base_words, base_words_nr,
- ref_words, ref_words_nr, maxwords, refbranch || major == WORD_TYPE_REF);
-
- pos = aug_get_next_sibling(tree, pos, &major, &minor);
- }
-}
-
-
-static void aug_find_words(parse_tree_node_t *tree, int startpos, int *base_words, int *base_words_nr,
- int *ref_words, int *ref_words_nr, int maxwords) {
- // initializing wrapper for aug_find_words_recursively()
- *base_words_nr = 0;
- *ref_words_nr = 0;
-
- aug_find_words_recursively(tree, startpos, base_words, base_words_nr, ref_words, ref_words_nr, maxwords, 0);
-}
-
-
-static int aug_contains_word(int *list, int length, int word) {
- int i;
-
- if (word == ANYWORD)
- return (length);
-
- for (i = 0; i < length; i++)
- if (list[i] == word)
- return 1;
-
- return 0;
-}
-
-
-static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_branch, int major, int minor, int *base_words, int base_words_nr,
- int *ref_words, int ref_words_nr);
-
-static int augment_match_expression_p(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_basepos, int major, int minor,
- int *base_words, int base_words_nr, int *ref_words, int ref_words_nr) {
- int cmajor, cminor, cpos;
- cpos = aug_get_first_child(saidt, augment_pos, &cmajor, &cminor);
- if (!cpos) {
- warning("augment_match_expression_p(): Empty condition");
- return 1;
- }
-
- scidprintf("Attempting to match (%03x %03x (%03x %03x\n", major, minor, cmajor, cminor);
-
- if ((major == WORD_TYPE_BASE) && (minor == AUGMENT_SENTENCE_MINOR_RECURSE))
- return augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr);
-
- switch (major) {
-
- case WORD_TYPE_BASE:
- while (cpos) {
- if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) {
- int word = aug_get_wgroup(saidt, cpos);
- scidprintf("Looking for word %03x\n", word);
-
- if (aug_contains_word(base_words, base_words_nr, word))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) {
- if (augment_sentence_expression(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) {
- int gc_major, gc_minor;
- int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor);
-
- while (gchild) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major,
- minor, base_words, base_words_nr,
- ref_words, ref_words_nr))
- return 1;
- gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor);
- }
- } else
- warning("augment_match_expression_p(): Unknown type 141 minor number %3x", cminor);
-
- cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor);
-
- }
- break;
-
- case WORD_TYPE_REF:
- while (cpos) {
- if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) {
- int word = aug_get_wgroup(saidt, cpos);
- scidprintf("Looking for refword %03x\n", word);
-
- if (aug_contains_word(ref_words, ref_words_nr, word))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
- } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) {
- int gc_major, gc_minor;
- int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor);
-
- while (gchild) {
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, major,
- minor, base_words, base_words_nr,
- ref_words, ref_words_nr))
- return 1;
- gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor);
- }
- } else
- warning("augment_match_expression_p(): Unknown type 144 minor number %3x", cminor);
-
- cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor);
-
- }
- break;
-
- case AUGMENT_SENTENCE_PART_BRACKETS:
- if (augment_match_expression_p(saidt, cpos, parset, parse_basepos, cmajor, cminor,
- base_words, base_words_nr, ref_words, ref_words_nr))
- return 1;
-
- scidprintf("Didn't match subexpression; checking sub-bracked predicate %03x\n", cmajor);
-
- switch (cmajor) {
- case WORD_TYPE_BASE:
- if (!base_words_nr)
- return 1;
- break;
-
- case WORD_TYPE_REF:
- if (!ref_words_nr)
- return 1;
- break;
-
- default:
- warning("augment_match_expression_p(): (subp1) Unkonwn sub-bracket predicate %03x", cmajor);
- }
-
- break;
-
- default:
- warning("augment_match_expression_p(): Unknown predicate %03x", major);
-
- }
-
- scidprintf("augment_match_expression_p(): Generic failure\n");
-
- return 0;
-}
-
-static int augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset,
- int parse_branch, int major, int minor, int *base_words, int base_words_nr,
- int *ref_words, int ref_words_nr) {
- int check_major, check_minor;
- int check_pos = aug_get_first_child(saidt, augment_pos, &check_major, &check_minor);
- do {
- if (!(augment_match_expression_p(saidt, check_pos, parset, parse_branch, check_major, check_minor,
- base_words, base_words_nr, ref_words, ref_words_nr)))
- return 0;
- } while ((check_pos = aug_get_next_sibling(saidt, check_pos, &check_major, &check_minor)));
-
- return 1;
-}
-
-static int augment_sentence_part(parse_tree_node_t *saidt, int augment_pos, parse_tree_node_t *parset, int parse_basepos, int major, int minor) {
- int pmajor, pminor;
- int parse_branch = parse_basepos;
- int optional = 0;
- int foundwords = 0;
-
- scidprintf("Augmenting (%03x %03x\n", major, minor);
-
- if (major == AUGMENT_SENTENCE_PART_BRACKETS) { // '[/ foo]' is true if '/foo' or if there
- // exists no x for which '/x' is true
- if ((augment_pos = aug_get_first_child(saidt, augment_pos, &major, &minor))) {
- scidprintf("Optional part: Now augmenting (%03x %03x\n", major, minor);
- optional = 1;
- } else {
- scidprintf("Matched empty optional expression\n");
- return 1;
- }
- }
-
- if ((major < 0x141) || (major > 0x143)) {
- scidprintf("augment_sentence_part(): Unexpected sentence part major number %03x\n", major);
- return 0;
- }
-
- while ((parse_branch = aug_get_next_sibling(parset, parse_branch, &pmajor, &pminor))) {
- if (pmajor == major) { // found matching sentence part
- int success;
- int base_words_nr;
- int ref_words_nr;
- int base_words[AUGMENT_MAX_WORDS];
- int ref_words[AUGMENT_MAX_WORDS];
-#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
- int i;
-#endif
-
- scidprintf("Found match with pminor = %03x\n", pminor);
- aug_find_words(parset, parse_branch, base_words, &base_words_nr, ref_words, &ref_words_nr, AUGMENT_MAX_WORDS);
- foundwords |= (ref_words_nr | base_words_nr);
-#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION
- printf("%d base words:", base_words_nr);
- for (i = 0; i < base_words_nr; i++)
- printf(" %03x", base_words[i]);
- printf("\n%d reference words:", ref_words_nr);
- for (i = 0; i < ref_words_nr; i++)
- printf(" %03x", ref_words[i]);
- printf("\n");
-#endif
-
- success = augment_sentence_expression(saidt, augment_pos, parset, parse_basepos, major, minor,
- base_words, base_words_nr, ref_words, ref_words_nr);
-
- if (success) {
- scidprintf("SUCCESS on augmenting (%03x %03x\n", major, minor);
- return 1;
- }
- }
- }
-
- if (optional && (foundwords == 0)) {
- scidprintf("Found no words and optional branch => SUCCESS on augmenting (%03x %03x\n", major, minor);
- return 1;
- }
- scidprintf("FAILURE on augmenting (%03x %03x\n", major, minor);
-
- return 0;
-}
-
-static int augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *saidt) {
- int augment_basepos = 0;
- int parse_basepos;
- int major, minor;
- int dontclaim = 0;
-
- parse_basepos = aug_get_base_node(parset);
- if (!parse_basepos) {
- warning("augment_parse_nodes(): Parse tree is corrupt");
- return 0;
- }
-
- augment_basepos = aug_get_base_node(saidt);
- if (!augment_basepos) {
- warning("augment_parse_nodes(): Said tree is corrupt");
- return 0;
- }
-
- while ((augment_basepos = aug_get_next_sibling(saidt, augment_basepos, &major, &minor))) {
- if ((major == 0x14b) && (minor == SAID_LONG(SAID_GT)))
- dontclaim = 1; // special case
- else // normal sentence part
- if (!(augment_sentence_part(saidt, augment_basepos, parset, parse_basepos, major, minor))) {
- scidprintf("Returning failure\n");
- return 0; // fail
- }
- }
-
- scidprintf("Returning success with dontclaim=%d\n", dontclaim);
-
- if (dontclaim)
- return SAID_PARTIAL_MATCH;
- else
- return 1; // full match
-}
-
-
-/*******************/
-/**** Main code ****/
-/*******************/
-
-int said(EngineState *s, byte *spec, bool verbose) {
- int retval;
- Vocabulary *voc = g_sci->getVocabulary();
-
- parse_tree_node_t *parse_tree_ptr = voc->_parserNodes;
-
- if (voc->parserIsValid) {
- if (said_parse_spec(spec)) {
- printf("Offending spec was: ");
- voc->decipherSaidBlock(spec);
- return SAID_NO_MATCH;
- }
-
- if (verbose)
- vocab_dump_parse_tree("Said-tree", said_tree); // Nothing better to do yet
- retval = augment_parse_nodes(parse_tree_ptr, &(said_tree[0]));
-
- if (!retval)
- return SAID_NO_MATCH;
- else if (retval != SAID_PARTIAL_MATCH)
- return SAID_FULL_MATCH;
- else
- return SAID_PARTIAL_MATCH;
- }
-
- return SAID_NO_MATCH;
-}
-
-
-#ifdef SAID_DEBUG_PROGRAM
-int main (int argc, char *argv) {
- byte block[] = {0x01, 0x00, 0xf8, 0xf5, 0x02, 0x01, 0xf6, 0xf2, 0x02, 0x01, 0xf2, 0x01, 0x03, 0xff};
- EngineState s;
-
- s.parser_valid = 1;
- said(&s, block);
-}
-#endif
-
-} // End of namespace Sci
diff --git a/engines/sci/parser/vocabulary.cpp b/engines/sci/parser/vocabulary.cpp
index 00448f5d51..20436d5b30 100644
--- a/engines/sci/parser/vocabulary.cpp
+++ b/engines/sci/parser/vocabulary.cpp
@@ -33,71 +33,34 @@
namespace Sci {
-#if 0
-
-#define VOCAB_RESOURCE_CLASSES 996
-/**
- * Vocabulary class names.
- * These strange names were taken from an SCI01 interpreter.
- */
-const char *class_names[] = {"",
- "",
- "conj", // conjunction
- "ass", // ?
- "pos", // preposition ?
- "art", // article
- "adj", // adjective
- "pron", // pronoun
- "noun", // noun
- "auxv", // auxillary verb
- "adv", // adverb
- "verb", // verb
- "",
- "",
- "",
- ""
- };
-
-int *vocab_get_classes(ResourceManager *resMan, int* count) {
- Resource* r;
- int *c;
- unsigned int i;
-
- if ((r = resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_CLASSES), 0)) == NULL)
- return 0;
-
- c = (int *)malloc(sizeof(int) * r->size / 2);
- for (i = 2; i < r->size; i += 4) {
- c[i/4] = READ_LE_UINT16(r->data + i);
- }
- *count = r->size / 4;
-
- return c;
-}
-
-int vocab_get_class_count(ResourceManager *resMan) {
- Resource* r;
-
- if ((r = resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_CLASSES), 0)) == 0)
- return 0;
-
- return r->size / 4;
-}
-
-#endif
-
-Vocabulary::Vocabulary(ResourceManager *resMan) : _resMan(resMan) {
+Vocabulary::Vocabulary(ResourceManager *resMan, bool foreign) : _resMan(resMan), _foreign(foreign) {
_parserRules = NULL;
- _vocabVersion = kVocabularySCI0;
memset(_parserNodes, 0, sizeof(_parserNodes));
// Mark parse tree as unused
_parserNodes[0].type = kParseTreeLeafNode;
- _parserNodes[0].content.value = 0;
+ _parserNodes[0].value = 0;
_synonyms.clear(); // No synonyms
debug(2, "Initializing vocabulary");
+ if (_resMan->testResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB))) {
+ _vocabVersion = kVocabularySCI0;
+ _resourceIdWords = VOCAB_RESOURCE_SCI0_MAIN_VOCAB;
+ _resourceIdSuffixes = VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB;
+ _resourceIdBranches = VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES;
+ } else {
+ _vocabVersion = kVocabularySCI1;
+ _resourceIdWords = VOCAB_RESOURCE_SCI1_MAIN_VOCAB;
+ _resourceIdSuffixes = VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB;
+ _resourceIdBranches = VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES;
+ }
+
+ if (_foreign) {
+ _resourceIdWords += 10;
+ _resourceIdSuffixes += 10;
+ _resourceIdBranches += 10;
+ }
if (getSciVersion() <= SCI_VERSION_1_EGA && loadParserWords()) {
loadSuffixes();
@@ -119,27 +82,46 @@ Vocabulary::~Vocabulary() {
freeSuffixes();
}
-bool Vocabulary::loadParserWords() {
+void Vocabulary::reset() {
+ parserIsValid = false; // Invalidate parser
+ parser_event = NULL_REG; // Invalidate parser event
+ parser_base = make_reg(g_sci->getEngineState()->_segMan->getSysStringsSegment(), SYS_STRING_PARSER_BASE);
+}
- char currentword[256] = ""; // They're not going to use words longer than 255 ;-)
- int currentwordpos = 0;
+bool Vocabulary::loadParserWords() {
+ char currentWord[VOCAB_MAX_WORDLENGTH] = "";
+ int currentWordPos = 0;
// First try to load the SCI0 vocab resource.
- Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB), 0);
+ Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdWords), 0);
if (!resource) {
- warning("SCI0: Could not find a main vocabulary, trying SCI01");
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_MAIN_VOCAB), 0);
- _vocabVersion = kVocabularySCI1;
+ warning("Could not find a main vocabulary");
+ return false; // NOT critical: SCI1 games and some demos don't have one!
}
- if (!resource) {
- warning("SCI1: Could not find a main vocabulary");
- return false; // NOT critical: SCI1 games and some demos don't have one!
+ VocabularyVersions resourceType = _vocabVersion;
+
+ if (resourceType == kVocabularySCI0) {
+ if (resource->size < 26 * 2) {
+ warning("Invalid main vocabulary encountered: Much too small");
+ return false;
+ }
+ // Check the alphabet-offset table for any content
+ int alphabetNr;
+ for (alphabetNr = 0; alphabetNr < 26; alphabetNr++) {
+ if (READ_LE_UINT16(resource->data + alphabetNr * 2))
+ break;
+ }
+ // If all of them were empty, we are definitely seeing SCI01 vocab in disguise (e.g. pq2 japanese)
+ if (alphabetNr == 26) {
+ warning("SCI0: Found SCI01 vocabulary in disguise");
+ resourceType = kVocabularySCI1;
+ }
}
unsigned int seeker;
- if (_vocabVersion == kVocabularySCI1)
+ if (resourceType == kVocabularySCI1)
seeker = 255 * 2; // vocab.900 starts with 255 16-bit pointers which we don't use
else
seeker = 26 * 2; // vocab.000 starts with 26 16-bit pointers which we don't use
@@ -155,13 +137,13 @@ bool Vocabulary::loadParserWords() {
while (seeker < resource->size) {
byte c;
- currentwordpos = resource->data[seeker++]; // Parts of previous words may be re-used
+ currentWordPos = resource->data[seeker++]; // Parts of previous words may be re-used
- if (_vocabVersion == kVocabularySCI1) {
+ if (resourceType == kVocabularySCI1) {
c = 1;
- while (seeker < resource->size && currentwordpos < 255 && c) {
+ while (seeker < resource->size && currentWordPos < 255 && c) {
c = resource->data[seeker++];
- currentword[currentwordpos++] = c;
+ currentWord[currentWordPos++] = c;
}
if (seeker == resource->size) {
warning("SCI1: Vocabulary not usable, disabling");
@@ -171,11 +153,11 @@ bool Vocabulary::loadParserWords() {
} else {
do {
c = resource->data[seeker++];
- currentword[currentwordpos++] = c & 0x7f; // 0x80 is used to terminate the string
+ currentWord[currentWordPos++] = c & 0x7f; // 0x80 is used to terminate the string
} while (c < 0x80);
}
- currentword[currentwordpos] = 0;
+ currentWord[currentWordPos] = 0;
// Now decode class and group:
c = resource->data[seeker + 1];
@@ -184,7 +166,7 @@ bool Vocabulary::loadParserWords() {
newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
// Add the word to the list
- _parserWords[currentword] = newWord;
+ _parserWords[currentWord] = newWord;
seeker += 3;
}
@@ -195,23 +177,20 @@ bool Vocabulary::loadParserWords() {
const char *Vocabulary::getAnyWordFromGroup(int group) {
if (group == VOCAB_MAGIC_NUMBER_GROUP)
return "{number}";
+ if (group == VOCAB_MAGIC_NOTHING_GROUP)
+ return "{nothing}";
- for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i)
+ for (WordMap::const_iterator i = _parserWords.begin(); i != _parserWords.end(); ++i) {
if (i->_value._group == group)
return i->_key.c_str();
+ }
return "{invalid}";
}
bool Vocabulary::loadSuffixes() {
// Determine if we can find a SCI1 suffix vocabulary first
- Resource* resource = NULL;
-
- if (_vocabVersion == kVocabularySCI0)
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB), 1);
- else
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB), 1);
-
+ Resource* resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdSuffixes), 1);
if (!resource)
return false; // No vocabulary found
@@ -224,7 +203,7 @@ bool Vocabulary::loadSuffixes() {
suffix.alt_suffix_length = strlen(suffix.alt_suffix);
seeker += suffix.alt_suffix_length + 1; // Hit end of string
- suffix.class_mask = (int16)READ_BE_UINT16(resource->data + seeker);
+ suffix.result_class = (int16)READ_BE_UINT16(resource->data + seeker);
seeker += 2;
// Beginning of next string - skip leading '*'
@@ -234,7 +213,7 @@ bool Vocabulary::loadSuffixes() {
suffix.word_suffix_length = strlen(suffix.word_suffix);
seeker += suffix.word_suffix_length + 1;
- suffix.result_class = (int16)READ_BE_UINT16(resource->data + seeker);
+ suffix.class_mask = (int16)READ_BE_UINT16(resource->data + seeker);
seeker += 3; // Next entry
_parserSuffixes.push_back(suffix);
@@ -244,13 +223,7 @@ bool Vocabulary::loadSuffixes() {
}
void Vocabulary::freeSuffixes() {
- Resource* resource = NULL;
-
- if (_vocabVersion == kVocabularySCI0)
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB), 0);
- else
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB), 0);
-
+ Resource* resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdSuffixes), 0);
if (resource)
_resMan->unlockResource(resource);
@@ -258,12 +231,7 @@ void Vocabulary::freeSuffixes() {
}
bool Vocabulary::loadBranches() {
- Resource *resource = NULL;
-
- if (_vocabVersion == kVocabularySCI0)
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES), 0);
- else
- resource = _resMan->findResource(ResourceId(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES), 0);
+ Resource *resource = _resMan->findResource(ResourceId(kResourceTypeVocab, _resourceIdBranches), 0);
_parserBranches.clear();
@@ -296,7 +264,7 @@ bool Vocabulary::loadBranches() {
return true;
}
-
+// we assume that *word points to an already lowercased word
ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
Common::String tempword(word, word_len);
@@ -323,7 +291,7 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
int suff_index = word_len - suffix->alt_suffix_length;
// Offset of the start of the suffix
- if (scumm_strnicmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched!
+ if (strncmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched!
// Terminate word at suffix start position...:
Common::String tempword2(word, MIN(word_len, suff_index));
@@ -353,82 +321,109 @@ ResultWord Vocabulary::lookupWord(const char *word, int word_len) {
return retval;
}
-void Vocabulary::decipherSaidBlock(byte *addr) {
- byte nextitem;
+void Vocabulary::debugDecipherSaidBlock(const byte *addr) {
+ bool first = true;
+ uint16 nextItem;
do {
- nextitem = *addr++;
-
- if (nextitem < 0xf0) {
- nextitem = nextitem << 8 | *addr++;
- printf(" %s[%03x]", getAnyWordFromGroup(nextitem), nextitem);
-
- nextitem = 42; // Make sure that group 0xff doesn't abort
- } else switch (nextitem) {
- case 0xf0:
- printf(" ,");
- break;
- case 0xf1:
- printf(" &");
- break;
- case 0xf2:
- printf(" /");
- break;
- case 0xf3:
- printf(" (");
- break;
- case 0xf4:
- printf(" )");
- break;
- case 0xf5:
- printf(" [");
- break;
- case 0xf6:
- printf(" ]");
- break;
- case 0xf7:
- printf(" #");
- break;
- case 0xf8:
- printf(" <");
- break;
- case 0xf9:
- printf(" >");
- break;
- case 0xff:
- break;
+ nextItem = *addr++;
+ if (nextItem != 0xff) {
+ if ((!first) && (nextItem != 0xf0))
+ printf(" ");
+ first = false;
+
+ if (nextItem < 0xf0) {
+ nextItem = nextItem << 8 | *addr++;
+ printf("%s{%03x}", getAnyWordFromGroup(nextItem), nextItem);
+
+ nextItem = 0; // Make sure that group 0xff doesn't abort
+ } else switch (nextItem) {
+ case 0xf0:
+ printf(",");
+ break;
+ case 0xf1:
+ printf("&");
+ break;
+ case 0xf2:
+ printf("/");
+ break;
+ case 0xf3:
+ printf("(");
+ break;
+ case 0xf4:
+ printf(")");
+ break;
+ case 0xf5:
+ printf("[");
+ break;
+ case 0xf6:
+ printf("]");
+ break;
+ case 0xf7:
+ printf("#");
+ break;
+ case 0xf8:
+ printf("<");
+ break;
+ case 0xf9:
+ printf(">");
+ break;
+ case 0xff:
+ break;
}
- } while (nextitem != 0xff);
-
- printf("\n");
+ }
+ } while (nextItem != 0xff);
}
+static const byte lowerCaseMap[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, // 0x00
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, // 0x10
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, // 0x20
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, // 0x30
+ 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', // 0x40
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, // 0x50
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, // 0x60
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, // 0x70
+ 0x87, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x84, 0x86, // 0x80
+ //^^ ^^^^ ^^^^
+ 0x82, 0x91, 0x91, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x94, 0x81, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, // 0x90
+ //^^ ^^^^ ^^^^ ^^^^
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa4, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, // 0xa0
+ // ^^^^
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, // 0xb0
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, // 0xc0
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, // 0xd0
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, // 0xe0
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff // 0xf0
+};
+
bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, char **error) {
- const char *lastword = sentence;
+ char currentWord[VOCAB_MAX_WORDLENGTH] = "";
int pos_in_sentence = 0;
- char c;
- int wordlen = 0;
+ unsigned char c;
+ int wordLen = 0;
*error = NULL;
do {
c = sentence[pos_in_sentence++];
-
- if (isalnum(c) || (c == '-' && wordlen))
- ++wordlen;
+ if (isalnum(c) || (c == '-' && wordLen) || (c >= 0x80)) {
+ currentWord[wordLen] = lowerCaseMap[c];
+ ++wordLen;
+ }
// Continue on this word */
// Words may contain a '-', but may not
// start with one.
else {
- if (wordlen) { // Finished a word?
+ if (wordLen) { // Finished a word?
- ResultWord lookup_result = lookupWord(lastword, wordlen);
+ ResultWord lookup_result = lookupWord(currentWord, wordLen);
// Look it up
if (lookup_result._class == -1) { // Not found?
- *error = (char *)calloc(wordlen + 1, 1);
- strncpy(*error, lastword, wordlen); // Set the offending word
+ *error = (char *)calloc(wordLen + 1, 1);
+ strncpy(*error, currentWord, wordLen); // Set the offending word
retval.clear();
return false; // And return with error
}
@@ -437,8 +432,7 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch
retval.push_back(lookup_result);
}
- lastword = sentence + pos_in_sentence;
- wordlen = 0;
+ wordLen = 0;
}
} while (c); // Until terminator is hit
@@ -447,7 +441,7 @@ bool Vocabulary::tokenizeString(ResultWordList &retval, const char *sentence, ch
}
void Vocabulary::printSuffixes() const {
- char word_buf[256], alt_buf[256];
+ char word_buf[VOCAB_MAX_WORDLENGTH], alt_buf[VOCAB_MAX_WORDLENGTH];
Console *con = g_sci->getSciDebugger();
int i = 0;
@@ -476,30 +470,25 @@ void Vocabulary::printParserWords() const {
con->DebugPrintf("\n");
}
-void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) {
- if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) {
- printf("Error(%04x)", nr);
- return;
- }
+void _vocab_recursive_ptree_dump_treelike(ParseTreeNode *tree) {
+ assert(tree);
- if (nodes[nr].type == kParseTreeLeafNode)
- //printf("[%03x]%04x", nr, nodes[nr].content.value);
- printf("%x", nodes[nr].content.value);
+ if (tree->type == kParseTreeLeafNode)
+ printf("%x", tree->value);
else {
- int lbranch = nodes[nr].content.branches[0];
- int rbranch = nodes[nr].content.branches[1];
- //printf("<[%03x]", nr);
+ ParseTreeNode* lbranch = tree->left;
+ ParseTreeNode* rbranch = tree->right;
printf("<");
if (lbranch)
- _vocab_recursive_ptree_dump_treelike(nodes, lbranch, nr);
+ _vocab_recursive_ptree_dump_treelike(lbranch);
else
printf("NULL");
printf(",");
if (rbranch)
- _vocab_recursive_ptree_dump_treelike(nodes, rbranch, nr);
+ _vocab_recursive_ptree_dump_treelike(rbranch);
else
printf("NULL");
@@ -507,55 +496,52 @@ void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int
}
}
-void _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int blanks) {
- int lbranch = nodes[nr].content.branches[0];
- int rbranch = nodes[nr].content.branches[1];
- int i;
+void _vocab_recursive_ptree_dump(ParseTreeNode *tree, int blanks) {
+ assert(tree);
- if (nodes[nr].type == kParseTreeLeafNode) {
- printf("vocab_dump_parse_tree: Error: consp is nil for element %03x\n", nr);
- return;
- }
+ ParseTreeNode* lbranch = tree->left;
+ ParseTreeNode* rbranch = tree->right;
+ int i;
- if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) {
- printf("Error(%04x))", nr);
+ if (tree->type == kParseTreeLeafNode) {
+ printf("vocab_dump_parse_tree: Error: consp is nil\n");
return;
}
if (lbranch) {
- if (nodes[lbranch].type == kParseTreeBranchNode) {
+ if (lbranch->type == kParseTreeBranchNode) {
printf("\n");
for (i = 0; i < blanks; i++)
printf(" ");
printf("(");
- _vocab_recursive_ptree_dump(nodes, lbranch, nr, blanks + 1);
+ _vocab_recursive_ptree_dump(lbranch, blanks + 1);
printf(")\n");
for (i = 0; i < blanks; i++)
printf(" ");
} else
- printf("%x", nodes[lbranch].content.value);
+ printf("%x", lbranch->value);
printf(" ");
}/* else printf ("nil");*/
if (rbranch) {
- if (nodes[rbranch].type == kParseTreeBranchNode)
- _vocab_recursive_ptree_dump(nodes, rbranch, nr, blanks);
+ if (rbranch->type == kParseTreeBranchNode)
+ _vocab_recursive_ptree_dump(rbranch, blanks);
else
- printf("%x", nodes[rbranch].content.value);
+ printf("%x", rbranch->value);
}/* else printf("nil");*/
}
-void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) {
+void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes) {
//_vocab_recursive_ptree_dump_treelike(nodes, 0, 0);
printf("(setq %s \n'(", tree_name);
- _vocab_recursive_ptree_dump(nodes, 0, 0, 1);
+ _vocab_recursive_ptree_dump(nodes, 1);
printf("))\n");
}
void Vocabulary::dumpParseTree() {
//_vocab_recursive_ptree_dump_treelike(nodes, 0, 0);
printf("(setq parse-tree \n'(");
- _vocab_recursive_ptree_dump(_parserNodes, 0, 0, 1);
+ _vocab_recursive_ptree_dump(_parserNodes, 1);
printf("))\n");
}
@@ -575,10 +561,10 @@ void Vocabulary::printParserNodes(int num) {
for (int i = 0; i < num; i++) {
con->DebugPrintf(" Node %03x: ", i);
if (_parserNodes[i].type == kParseTreeLeafNode)
- con->DebugPrintf("Leaf: %04x\n", _parserNodes[i].content.value);
+ con->DebugPrintf("Leaf: %04x\n", _parserNodes[i].value);
else
- con->DebugPrintf("Branch: ->%04x, ->%04x\n", _parserNodes[i].content.branches[0],
- _parserNodes[i].content.branches[1]);
+ con->DebugPrintf("Branch: ->%04x, ->%04x\n", _parserNodes[i].left,
+ _parserNodes[i].right);
}
}
@@ -591,7 +577,7 @@ int Vocabulary::parseNodes(int *i, int *pos, int type, int nr, int argc, const c
if (type == kParseNumber) {
_parserNodes[*pos += 1].type = kParseTreeLeafNode;
- _parserNodes[*pos].content.value = nr;
+ _parserNodes[*pos].value = nr;
return *pos;
}
if (type == kParseEndOfInput) {
@@ -623,7 +609,15 @@ int Vocabulary::parseNodes(int *i, int *pos, int type, int nr, int argc, const c
}
}
- if ((newPos = _parserNodes[oldPos].content.branches[j] = parseNodes(i, pos, nextToken, nextValue, argc, argv)) == -1)
+ newPos = parseNodes(i, pos, nextToken, nextValue, argc, argv);
+
+ if (j == 0)
+ _parserNodes[oldPos].left = &_parserNodes[newPos];
+ else
+ _parserNodes[oldPos].right = &_parserNodes[newPos];
+
+
+ if (newPos == -1)
return -1;
}
diff --git a/engines/sci/parser/vocabulary.h b/engines/sci/parser/vocabulary.h
index dccef0f5f3..d4df8af715 100644
--- a/engines/sci/parser/vocabulary.h
+++ b/engines/sci/parser/vocabulary.h
@@ -73,13 +73,16 @@ enum {
kParseNumber = 4
};
+#define VOCAB_MAX_WORDLENGTH 256
+
/* Anywords are ignored by the parser */
#define VOCAB_CLASS_ANYWORD 0xff
/* This word class is used for numbers */
#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
+#define VOCAB_MAGIC_NOTHING_GROUP 0xffe
-/* Number of nodes for each parse_tree_node structure */
+/* Number of nodes for each ParseTreeNode structure */
#define VOCAB_TREE_NODES 500
#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
@@ -115,7 +118,7 @@ struct ResultWord {
typedef Common::List<ResultWord> ResultWordList;
-typedef Common::HashMap<Common::String, ResultWord, Common::IgnoreCase_Hash, Common::IgnoreCase_EqualTo> WordMap;
+typedef Common::HashMap<Common::String, ResultWord, Common::CaseSensitiveString_Hash, Common::CaseSensitiveString_EqualTo> WordMap;
struct ParseRuleList;
@@ -149,16 +152,16 @@ struct parse_tree_branch_t {
};
enum ParseTypes {
- kParseTreeLeafNode = 0,
- kParseTreeBranchNode = 1
+ kParseTreeWordNode = 4,
+ kParseTreeLeafNode = 5,
+ kParseTreeBranchNode = 6
};
-struct parse_tree_node_t {
+struct ParseTreeNode {
ParseTypes type; /**< leaf or branch */
- union {
- int value; /**< For leaves */
- short branches[2]; /**< For branches */
- } content;
+ int value; /**< For leaves */
+ ParseTreeNode* left; /**< Left child, for branches */
+ ParseTreeNode* right; /**< Right child, for branches */
};
enum VocabularyVersions {
@@ -168,9 +171,12 @@ enum VocabularyVersions {
class Vocabulary {
public:
- Vocabulary(ResourceManager *resMan);
+ Vocabulary(ResourceManager *resMan, bool foreign);
~Vocabulary();
+ // reset parser status
+ void reset();
+
/**
* Gets any word from the specified group. For debugging only.
* @param group Group number
@@ -229,7 +235,7 @@ public:
* For debugging only.
* @param pos pointer to the data to dump
*/
- void decipherSaidBlock(byte *pos);
+ void debugDecipherSaidBlock(const byte *pos);
/**
* Prints the parser suffixes to the debug console.
@@ -301,6 +307,11 @@ private:
ResourceManager *_resMan;
VocabularyVersions _vocabVersion;
+ bool _foreign;
+ uint16 _resourceIdWords;
+ uint16 _resourceIdSuffixes;
+ uint16 _resourceIdBranches;
+
// Parser-related lists
SuffixList _parserSuffixes;
ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */
@@ -310,7 +321,7 @@ private:
public:
// Accessed by said()
- parse_tree_node_t _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
+ ParseTreeNode _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
// Parser data:
reg_t parser_base; /**< Base address for the parser error reporting mechanism */
@@ -323,7 +334,7 @@ public:
* @param tree_name Name of the tree to dump (free-form)
* @param nodes The nodes containing the parse tree
*/
-void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
+void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
@@ -334,7 +345,7 @@ void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
* @param verbose Whether to display the parse tree after building it
* @return 1 on a match, 0 otherwise
*/
-int said(EngineState *s, byte *spec, bool verbose);
+int said(EngineState *s, const byte *spec, bool verbose);
} // End of namespace Sci