diff options
-rw-r--r-- | engines/sci/engine/cfsml.pl | 1182 | ||||
-rw-r--r-- | engines/sci/engine/said.y | 950 |
2 files changed, 2132 insertions, 0 deletions
diff --git a/engines/sci/engine/cfsml.pl b/engines/sci/engine/cfsml.pl new file mode 100644 index 0000000000..f136ffce9a --- /dev/null +++ b/engines/sci/engine/cfsml.pl @@ -0,0 +1,1182 @@ +#! /usr/bin/env perl +# The C File Storage Meta Language "reference" implementation +# This implementation is supposed to conform to version +$version = "0.8.2"; +# of the spec. Please contact the maintainer if it doesn't. +# +# cfsml.pl Copyright (C) 1999, 2000, 2001 Christoph Reichenbach +# +# +# This program may be modified and copied freely according to the terms of +# the GNU general public license (GPL), as long as the above copyright +# notice and the licensing information contained herein are preserved. +# +# Please refer to www.gnu.org for licensing details. +# +# This work is provided AS IS, without warranty of any kind, expressed or +# implied, including but not limited to the warranties of merchantibility, +# noninfringement, and fitness for a specific purpose. The author will not +# be held liable for any damage caused by this work or derivatives of it. +# +# By using this source code, you agree to the licensing terms as stated +# above. +# +# +# Please contact the maintainer for bug reports or inquiries. +# +# Current Maintainer: +# +# Christoph Reichenbach (CJR) [jameson@linuxgames.com] +# +# +# Warning: This is still a "bit" messy. Sorry for that. +# + +#$debug = 1; + +$write_lines = "true"; +$source_file = "CFSML source file"; +$type_integer = "integer"; +$type_string = "string"; +$type_record = "RECORD"; +$type_pointer = "POINTER"; +$type_abspointer = "ABSPOINTER"; + +%types; # Contains all type bindings +%records; # Contains all record bindings + +$mode = undef; +while ($op = shift @ARGV) { + if ($mode eq undef) { + if ($op eq "-f") { + $mode = "fname"; + } elsif ($op eq "-l") { + $write_lines = undef; + } elsif ($op eq "-v") { + print "cfsml.pl, the CFSML code generator, version $version\n"; + print "This program is provided WITHOUT WARRANTY of any kind. It may be\n"; + print "copied and modified freely according to the terms of the GNU\n"; + print "General Public License.\n"; + exit(0); + } elsif ($op eq "-h") { + print "CFSML help:\n"; + print "Usage: cat source | cfsml.pl [-v] [-h] [-l] [-f <filename>] > dest\n"; + print " -h : help\n"; + print " -v : print version\n"; + print " -l : disable line number printing in dest file\n"; + print " -f : specify file name for line number printing\n"; + exit(0); + } else { + die "Unknown option '$op'\n"; + } + } elsif ($mode eq "fname") { + $source_file = $op; + $mode = 0; + } else { + die "Invalid internal state '$mode'\n"; + } +} + +sub write_line_pp +# write_line_pp(int line_nr, bool input_file?) +{ + my $line_nr = shift; + my $_file = shift; + my $filename = "cfsml.pl"; + + if (_file) { + $filename = $source_file; + } + + if ($write_lines) { + print "#line $line_nr \"$filename\"\n"; + } +} + +sub create_string_functions + { + $firstline = __LINE__; + $firstline += 4; + write_line_pp($firstline, 0); + print <<'EOF'; + +#include <stdarg.h> /* We need va_lists */ +#include <sci_memory.h> + +#ifdef CFSML_DEBUG_MALLOC +/* +#define free(p) dbg_sci_free(p) +#define malloc(s) dbg_sci_malloc(s) +#define calloc(n, s) dbg_sci_calloc(n, s) +#define realloc(p, s) dbg_sci_realloc(p, s) +*/ +#define free dbg_sci_free +#define malloc dbg_sci_malloc +#define calloc dbg_sci_calloc +#define realloc dbg_sci_realloc +#endif + +static void +_cfsml_error(char *fmt, ...) +{ + va_list argp; + + fprintf(stderr, "Error: "); + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); + +} + + +static struct _cfsml_pointer_refstruct { + struct _cfsml_pointer_refstruct *next; + void *ptr; +} *_cfsml_pointer_references = NULL; + +static struct _cfsml_pointer_refstruct **_cfsml_pointer_references_current = &_cfsml_pointer_references; + +static char *_cfsml_last_value_retreived = NULL; +static char *_cfsml_last_identifier_retreived = NULL; + +static void +_cfsml_free_pointer_references_recursively(struct _cfsml_pointer_refstruct *refs, int free_pointers) +{ + if (!refs) + return; + #ifdef CFSML_DEBUG_MALLOC + SCI_MEMTEST; + #endif + + _cfsml_free_pointer_references_recursively(refs->next, free_pointers); + #ifdef CFSML_DEBUG_MALLOC + SCI_MEMTEST; + + fprintf(stderr,"Freeing ptrref %p [%p] %s\n", refs->ptr, refs, free_pointers? + "ALL": "cleanup only"); + #endif + + if (free_pointers) + free(refs->ptr); + + #ifdef CFSML_DEBUG_MALLOC + SCI_MEMTEST; + #endif + free(refs); + #ifdef CFSML_DEBUG_MALLOC + SCI_MEMTEST; + #endif +} + +static void +_cfsml_free_pointer_references(struct _cfsml_pointer_refstruct **meta_ref, int free_pointers) +{ + _cfsml_free_pointer_references_recursively(*meta_ref, free_pointers); + *meta_ref = NULL; + _cfsml_pointer_references_current = meta_ref; +} + +static struct _cfsml_pointer_refstruct ** +_cfsml_get_current_refpointer() +{ + return _cfsml_pointer_references_current; +} + +static void _cfsml_register_pointer(void *ptr) +{ + struct _cfsml_pointer_refstruct *newref = (struct _cfsml_pointer_refstruct*)sci_malloc(sizeof (struct _cfsml_pointer_refstruct)); + #ifdef CFSML_DEBUG_MALLOC + SCI_MEMTEST; + fprintf(stderr,"Registering ptrref %p [%p]\n", ptr, newref); + #endif + newref->next = *_cfsml_pointer_references_current; + newref->ptr = ptr; + *_cfsml_pointer_references_current = newref; +} + + +static char * +_cfsml_mangle_string(char *s) +{ + char *source = s; + char c; + char *target = (char *) sci_malloc(1 + strlen(s) * 2); /* We will probably need less than that */ + char *writer = target; + + while ((c = *source++)) { + + if (c < 32) { /* Special character? */ + *writer++ = '\\'; /* Escape... */ + c += ('a' - 1); + } else if (c == '\\' || c == '"') + *writer++ = '\\'; /* Escape, but do not change */ + *writer++ = c; + + } + *writer = 0; /* Terminate string */ + + return (char *) sci_realloc(target, strlen(target) + 1); +} + + +static char * +_cfsml_unmangle_string(char *s) +{ + char *target = (char *) sci_malloc(1 + strlen(s)); + char *writer = target; + char *source = s; + char c; + + while ((c = *source++) && (c > 31)) { + if (c == '\\') { /* Escaped character? */ + c = *source++; + if ((c != '\\') && (c != '"')) /* Un-escape 0-31 only */ + c -= ('a' - 1); + } + *writer++ = c; + } + *writer = 0; /* Terminate string */ + + return (char *) sci_realloc(target, strlen(target) + 1); +} + + +static char * +_cfsml_get_identifier(FILE *fd, int *line, int *hiteof, int *assignment) +{ + int c; + int mem = 32; + int pos = 0; + int done = 0; + char *retval = (char *) sci_malloc(mem); + + if (_cfsml_last_identifier_retreived) { + free(_cfsml_last_identifier_retreived); + _cfsml_last_identifier_retreived = NULL; + } + + while (isspace(c = fgetc(fd)) && (c != EOF)); + if (c == EOF) { + _cfsml_error("Unexpected end of file at line %d\n", *line); + free(retval); + *hiteof = 1; + return NULL; + } + + ungetc(c, fd); + + while (((c = fgetc(fd)) != EOF) && ((pos == 0) || (c != '\n')) && (c != '=')) { + + if (pos == mem - 1) /* Need more memory? */ + retval = (char *) sci_realloc(retval, mem *= 2); + + if (!isspace(c)) { + if (done) { + _cfsml_error("Single word identifier expected at line %d\n", *line); + free(retval); + return NULL; + } + retval[pos++] = c; + } else + if (pos != 0) + done = 1; /* Finished the variable name */ + else if (c == '\n') + ++(*line); + } + + if (c == EOF) { + _cfsml_error("Unexpected end of file at line %d\n", *line); + free(retval); + *hiteof = 1; + return NULL; + } + + if (c == '\n') { + ++(*line); + if (assignment) + *assignment = 0; + } else + if (assignment) + *assignment = 1; + + if (pos == 0) { + _cfsml_error("Missing identifier in assignment at line %d\n", *line); + free(retval); + return NULL; + } + + if (pos == mem - 1) /* Need more memory? */ + retval = (char *) sci_realloc(retval, mem += 1); + + retval[pos] = 0; /* Terminate string */ +EOF + +if ($debug) { + print " printf(\"identifier is '%s'\\n\", retval);\n"; +} + + $firstline = __LINE__; + $firstline += 4; + write_line_pp($firstline, 0); + print <<'EOF2'; + + return _cfsml_last_identifier_retreived = retval; +} + + +static char * +_cfsml_get_value(FILE *fd, int *line, int *hiteof) +{ + int c; + int mem = 64; + int pos = 0; + char *retval = (char *) sci_malloc(mem); + + if (_cfsml_last_value_retreived) { + free(_cfsml_last_value_retreived); + _cfsml_last_value_retreived = NULL; + } + + while (((c = fgetc(fd)) != EOF) && (c != '\n')) { + + if (pos == mem - 1) /* Need more memory? */ + retval = (char *) sci_realloc(retval, mem *= 2); + + if (pos || (!isspace(c))) + retval[pos++] = c; + + } + + while ((pos > 0) && (isspace(retval[pos - 1]))) + --pos; /* Strip trailing whitespace */ + + if (c == EOF) + *hiteof = 1; + + if (pos == 0) { + _cfsml_error("Missing value in assignment at line %d\n", *line); + free(retval); + return NULL; + } + + if (c == '\n') + ++(*line); + + if (pos == mem - 1) /* Need more memory? */ + retval = (char *) sci_realloc(retval, mem += 1); + + retval[pos] = 0; /* Terminate string */ +EOF2 + + if ($debug) { + print " printf(\"value is '%s'\\n\", retval);\n"; + } + + $firstline = __LINE__; + $firstline += 4; + write_line_pp($firstline, 0); + print <<'EOF3'; + return (_cfsml_last_value_retreived = (char *) sci_realloc(retval, strlen(retval) + 1)); + /* Re-allocate; this value might be used for quite some while (if we are + ** restoring a string) + */ +} +EOF3 + } + + +# Call with $expression as a simple expression, like "tos + 1". +# Returns (in this case) ("tos", "-1"). +sub lvaluize + { + my @retval; +# print "//DEBUG: $expression ["; + my @tokens = split (/([+-\/\*])/, $expression); +# print join(",", @tokens); + $retval[0] = $tokens[0]; + + my $rightvalue = ""; + for ($i = 1; $tokens[$i]; $i++) { + + if ($tokens[$i] eq "+") { + $rightvalue .= "-"; + } elsif ($tokens[$i] eq "-") { + $rightvalue .= "+"; + } elsif ($tokens[$i] eq "/") { + $rightvalue .= "*"; + } elsif ($tokens[$i] eq "*") { + $rightvalue .= "/"; + } else { + $rightvalue .= $tokens[$i]; + } + } + + $retval[1] = $rightvalue; + +# print "] => ($retval[0];$retval[1])\n"; + + return @retval; + } + + + +sub create_declaration + { + $typename = $type; + $ctype = $types{$type}->{'ctype'}; + + if (not $types{$type}->{'external'}) { + $types{$type}{'writer'} = "_cfsml_write_" . $typename; + $types{$type}{'reader'} = "_cfsml_read_" . $typename; + write_line_pp(__LINE__, 0); + print "static void\n$types{$type}{'writer'}(FILE *fh, $ctype* save_struc);\n"; + print "static int\n$types{$type}{'reader'}(FILE *fh, $ctype* save_struc, char *lastval,". + " int *line, int *hiteof);\n\n"; + }; + + } + +sub create_writer + { + $typename = $type; + $ctype = $types{$type}{'ctype'}; + + write_line_pp(__LINE__, 0); + print "static void\n_cfsml_write_$typename(FILE *fh, $ctype* save_struc)\n{\n"; + if ($types{$type}{'type'} eq $type_record) { + print " int min, max, i;\n\n"; + } + + if ($types{$type}{'type'} eq $type_integer) { + print " fprintf(fh, \"%li\", (long) *save_struc);\n"; + } + elsif ($types{$type}{'type'} eq $type_string) { + write_line_pp(__LINE__, 0); + print " if (!(*save_struc))\n"; + print " fprintf(fh, \"\\\\null\\\\\");\n"; + print " else {\n"; + print " char *token = _cfsml_mangle_string((char *) *save_struc);\n"; + print " fprintf(fh, \"\\\"%s\\\"\", token);\n"; + print " free(token);\n"; + print " }\n"; + } + elsif ($types{$type}{'type'} eq $type_record) { + write_line_pp(__LINE__, 0); + print " fprintf(fh, \"{\\n\");\n"; + + for $n (@{$records{$type}}) { + + print " fprintf(fh, \"$n->{'name'} = \");\n"; + + if ($n->{'array'}) { # Check for arrays + + if ($n->{'array'} eq 'static' or $n->{'size'} * 2) { # fixed integer value? + print " min = max = $n->{'size'};\n"; + } + else { # No, a variable + print " min = max = save_struc->$n->{'size'};\n"; + } + + if ($n->{'maxwrite'}) { # A write limit? + print " if (save_struc->$n->{'maxwrite'} < min)\n"; + print " min = save_struc->$n->{'maxwrite'};\n"; + } + + if ($n->{'array'} eq 'dynamic') { + print " if (!save_struc->$n->{'name'})\n"; + print " min = max = 0; /* Don't write if it points to NULL */\n"; + } + + write_line_pp(__LINE__, 0); + print " fprintf(fh, \"[%d][\\n\", max);\n"; + print " for (i = 0; i < min; i++) {\n"; + print " $types{$n->{'type'}}{'writer'}"; + my $subscribstr = "[i]"; # To avoid perl interpolation problems + print "(fh, &(save_struc->$n->{'name'}$subscribstr));\n"; + print " fprintf(fh, \"\\n\");\n"; + print " }\n"; + print " fprintf(fh, \"]\");\n"; + + } elsif ($n->{'type'} eq $type_pointer) { # Relative pointer + + print " fprintf(fh, \"%d\", save_struc->$n->{'name'} - save_struc->$n->{'anchor'});" . + " /* Relative pointer */\n"; + + } elsif ($n->{'type'} eq $type_abspointer) { # Absolute pointer + + print " if (!save_struc->$n->{'name'})\n"; + print " fprintf(fh, \"\\\\null\\\\\");\n"; + print " else \n"; + print " $types{$n->{'reftype'}}{'writer'}"; + print "(fh, save_struc->$n->{'name'});\n"; + + } else { # Normal record entry + + print " $types{$n->{'type'}}{'writer'}"; + print "(fh, ($types{$n->{'type'}}{'ctype'}*) &(save_struc->$n->{'name'}));\n"; + + } + + print " fprintf(fh, \"\\n\");\n"; + } + + print " fprintf(fh, \"}\");\n"; + } + else { + print STDERR "Warning: Attempt to create_writer for invalid type '$types{$type}{'type'}'\n"; + } + print "}\n\n"; + + } + + +sub create_reader + { + $typename = $type; + $ctype = $types{$type}{'ctype'}; + + write_line_pp(__LINE__, 0); + print "static int\n_cfsml_read_$typename"; + print "(FILE *fh, $ctype* save_struc, char *lastval, int *line, int *hiteof)\n{\n"; + + print " char *token;\n"; + if ($types{$type}{'type'} eq $type_record) { + print "int min, max, i;\n"; + } + my $reladdress_nr = 0; # Number of relative addresses needed + my $reladdress = 0; # Current relative address number + my $reladdress_resolver = ""; # Relative addresses are resolved after the main while block + + if ($types{$type}{'type'} eq $type_record) { + + foreach $n (@{$records{$type}}) { # Count relative addresses we need + if ($n->{'type'} eq $type_pointer) { + ++$reladdress_nr; + } + } + + if ($reladdress_nr) { # Allocate stack space for all relative addresses needed + print " int reladdresses[$reladdress_nr] = {0};\n"; + } + } + + if ($types{$type}{'type'} eq $type_integer) { + write_line_pp(__LINE__, 0); + print "\n *save_struc = strtol(lastval, &token, 0);\n"; + print " if ( (*save_struc == 0) && (token == lastval) ) {\n"; + print " _cfsml_error(\"strtol failed at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " if (*token != 0) {\n"; + print " _cfsml_error(\"Non-integer encountered while parsing int value at line %d\\n\","; + print " *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " return CFSML_SUCCESS;\n"; + } elsif ($types{$type}{'type'} eq $type_string) { + write_line_pp(__LINE__, 0); + print "\n"; + print " if (strcmp(lastval, \"\\\\null\\\\\")) { /* null pointer? */\n"; + print " if (*lastval == '\"') { /* Quoted string? */\n"; + print " int seeker = strlen(lastval);\n\n"; + print " while (lastval[seeker] != '\"')\n"; + print " --seeker;\n\n"; + print " if (!seeker) { /* No matching double-quotes? */\n"; + print " _cfsml_error(\"Unbalanced quotes at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n\n"; + print " lastval[seeker] = 0; /* Terminate string at closing quotes... */\n"; + print " lastval++; /* ...and skip the opening quotes locally */\n"; + print " }\n"; + print " *save_struc = _cfsml_unmangle_string(lastval);\n"; + print " _cfsml_register_pointer(*save_struc);\n"; + print " return CFSML_SUCCESS;\n"; + print " } else {\n"; + print " *save_struc = NULL;\n"; + print " return CFSML_SUCCESS;\n"; + print " }\n"; + } elsif ($types{$type}{'type'} eq $type_record) { + write_line_pp(__LINE__, 0); + print " int assignment, closed, done;\n\n"; + print " if (strcmp(lastval, \"{\")) {\n"; + print " _cfsml_error(\"Reading record $type; expected opening braces in line %d, got \\\"%s\\\"\\n\","; + print "*line, lastval);\n"; + print " return CFSML_FAILURE;\n"; + print " };\n"; + print " closed = 0;\n"; + print " do {\n"; + print " char *value;\n"; + print " token = _cfsml_get_identifier(fh, line, hiteof, &assignment);\n\n"; + print " if (!token) {\n"; + print " _cfsml_error(\"Expected token at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " if (!assignment) {\n"; + print " if (!strcmp(token, \"}\")) \n"; + print " closed = 1;\n"; + print " else {\n"; + print " _cfsml_error(\"Expected assignment or closing braces in line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " } else {\n"; + print " value = \"\";\n"; + print " while (!value || !strcmp(value, \"\"))\n"; + print " value = _cfsml_get_value(fh, line, hiteof);\n"; + print " if (!value) {\n"; + print " _cfsml_error(\"Expected token at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; +# print " }\n"; + + + foreach $n (@{$records{$type}}) { # Now take care of all record elements + + my $type = $n->{'type'}; + my $reference = undef; + if ($type eq $type_abspointer) { + $reference = 1; + $type = $n->{'reftype'}; + } + my $name = $n->{'name'}; + my $reader = $types{$type}{'reader'}; + my $size = $n->{'size'}; + + print " if (!strcmp(token, \"$name\")) {\n"; + + if ($type eq $type_pointer) { # A relative pointer + + $reader = $types{'int'}{'reader'}; # Read relpointer as int + + write_line_pp(__LINE__, 0); + print " if ($reader(fh, &(reladdresses[$reladdress]), value, line, hiteof)) {\n"; + print " _cfsml_error(\"Expected token at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + + # Make sure that the resulting variable is interpreted correctly + $reladdress_resolver .= " save_struc->$n->{'name'} =". + " save_struc->$n->{'anchor'} + reladdresses[$reladdress];\n"; + + ++$reladdress; # Prepare reladdress for next element + + } elsif ($n->{'array'}) { # Is it an array? + write_line_pp(__LINE__, 0); + print " if ((value[0] != '[') || (value[strlen(value) - 1] != '[')) {\n"; + # The value must end with [, since we're starting array data, and it must also + # begin with [, since this is either the only character in the line, or it starts + # the "amount of memory to allocate" block + print " _cfsml_error(\"Opening brackets expected at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + + if ($n->{'array'} eq 'dynamic') { + write_line_pp(__LINE__, 0); + # We need to allocate the array first + print " /* Prepare to restore dynamic array */\n"; + # Read amount of memory to allocate + print " max = strtol(value + 1, NULL, 0);\n"; + print " if (max < 0) {\n"; + print " _cfsml_error(\"Invalid number of elements to allocate for dynamic "; + print "array '%s' at line %d\\n\", token, *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n\n"; + + print " if (max) {\n"; + print " save_struc->$name = ($n->{'type'} *) sci_malloc(max * sizeof($type));\n"; + print "#ifdef SATISFY_PURIFY\n"; + print " memset(save_struc->$name, 0, max * sizeof($type));\n"; + print "#endif\n"; + print " _cfsml_register_pointer(save_struc->$name);\n"; + print " }\n"; + print " else\n"; + print " save_struc->$name = NULL;\n" + + } else { # static array + print " /* Prepare to restore static array */\n"; + print " max = $size;\n"; + } + + write_line_pp(__LINE__, 0); + print " done = i = 0;\n"; + print " do {\n"; + if ($type eq $type_record) { + print " if (!(value = _cfsml_get_value(fh, line, hiteof))) {\n"; + } else { + print " if (!(value = _cfsml_get_identifier(fh, line, hiteof, NULL))) {\n"; + } + write_line_pp(__LINE__, 0); + + print " _cfsml_error(\"Token expected at line %d\\n\", *line);\n"; + print " return 1;\n"; + print " }\n"; + print " if (strcmp(value, \"]\")) {\n"; + print " if (i == max) {\n"; + print " _cfsml_error(\"More elements than space available (%d) in '%s' at "; + print "line %d\\n\", max, token, *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + my $helper = "[i++]"; + print " if ($reader(fh, &(save_struc->$name$helper), value, line, hiteof)) {\n"; + print " _cfsml_error(\"Token expected by $reader() for $name$helper at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " } else done = 1;\n"; + print " } while (!done);\n"; + + if ($n->{'array'} eq "dynamic") { + my @xpr = lvaluize($expression = $n->{'size'}); + print " save_struc->$xpr[0] = max $xpr[1]; /* Set array size accordingly */\n"; + } + + if ($n->{'maxwrite'}) { + my @xpr = lvaluize($expression = $n->{'maxwrite'}); + print " save_struc->$xpr[0] = i $xpr[1]; /* Set number of elements */\n"; + } + + } + elsif ($reference) { + write_line_pp(__LINE__, 0); + print " if (strcmp(value, \"\\\\null\\\\\")) { /* null pointer? */\n"; + print " save_struc->$name = sci_malloc(sizeof ($type));\n"; + print " _cfsml_register_pointer(save_struc->$name);\n"; + print " if ($reader(fh, save_struc->$name, value, line, hiteof)) {\n"; + print " _cfsml_error(\"Token expected by $reader() for $name at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " } else save_struc->$name = NULL;\n"; + } + else { # It's a simple variable or a struct + write_line_pp(__LINE__, 0); + print " if ($reader(fh, ($types{$type}{'ctype'}*) &(save_struc->$name), value, line, hiteof)) {\n"; + print " _cfsml_error(\"Token expected by $reader() for $name at line %d\\n\", *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + } + print " } else\n"; + + } + write_line_pp(__LINE__, 0); + print " {\n"; + print " _cfsml_error(\"$type: Assignment to invalid identifier '%s' in line %d\\n\","; + print " token, *line);\n"; + print " return CFSML_FAILURE;\n"; + print " }\n"; + print " }\n"; + + print " } while (!closed); /* Until closing braces are hit */\n"; + + print $reladdress_resolver; # Resolves any relative addresses + + print " return CFSML_SUCCESS;\n"; + } else { + print STDERR "Warning: Attempt to create_reader for invalid type '$types{$type}{'type'}'\n"; + } + + print "}\n\n"; + } + +# Built-in types + +%types = ( + 'int' => { + 'type' => $type_integer, + 'ctype' => "int", + }, + + 'string' => { + 'type' => $type_string, + 'ctype' => "char *", + }, + ); + + + +sub create_function_block { + print "\n/* Auto-generated CFSML declaration and function block */\n\n"; + write_line_pp(__LINE__, 0); + print "#define CFSML_SUCCESS 0\n"; + print "#define CFSML_FAILURE 1\n\n"; + create_string_functions; + + foreach $n ( keys %types ) { + create_declaration($type = $n); + } + + foreach $n ( keys %types ) { + if (not $types{$n}->{'external'}) { + create_writer($type = $n); + create_reader($type = $n); + } + } + print "\n/* Auto-generated CFSML declaration and function block ends here */\n"; + print "/* Auto-generation performed by cfsml.pl $version */\n"; +} + + +# Gnerates code to read a data type +# Parameters: $type: Type to read +# $datap: Pointer to the write destination +# $fh: Existing filehandle of an open file to use +# $eofvar: Variable to store _cfsml_eof into +sub insert_reader_code { + print "/* Auto-generated CFSML data reader code */\n"; + write_line_pp(__LINE__, 0); + print " {\n"; + if (!$linecounter) { + write_line_pp(__LINE__, 0); + print " int _cfsml_line_ctr = 0;\n"; + $linecounter = '_cfsml_line_ctr'; + } + if ($atomic) { + write_line_pp(__LINE__, 0); + print " struct _cfsml_pointer_refstruct **_cfsml_myptrrefptr = _cfsml_get_current_refpointer();\n"; + } + write_line_pp(__LINE__, 0); + print " int _cfsml_eof = 0, _cfsml_error;\n"; + print " int dummy;\n"; + + if ($firsttoken) { + write_line_pp(__LINE__, 0); + print " char *_cfsml_inp = $firsttoken;\n"; + } else { + write_line_pp(__LINE__, 0); + print " char *_cfsml_inp =". + " _cfsml_get_identifier($fh, &($linecounter), &_cfsml_eof, &dummy);\n\n"; + } + + write_line_pp(__LINE__, 0); + print " _cfsml_error =". + " $types{$type}{'reader'}($fh, $datap, _cfsml_inp, &($linecounter), &_cfsml_eof);\n"; + + if ($eofvar) { + write_line_pp(__LINE__, 0); + print " $eofvar = _cfsml_error;\n"; + } + if ($atomic) { + write_line_pp(__LINE__, 0); + print " _cfsml_free_pointer_references(_cfsml_myptrrefptr, _cfsml_error);\n"; + } + write_line_pp(__LINE__, 0); + print " if (_cfsml_last_value_retreived) {\n"; + print " free(_cfsml_last_value_retreived);\n"; + print " _cfsml_last_value_retreived = NULL;\n"; + print " }\n"; + print " if (_cfsml_last_identifier_retreived) {\n"; + print " free(_cfsml_last_identifier_retreived);\n"; + print " _cfsml_last_identifier_retreived = NULL;\n"; + print " }\n"; + print " }\n"; + print "/* End of auto-generated CFSML data reader code */\n"; +} + +# Generates code to write a data type +# Parameters: $type: Type to write +# $datap: Pointer to the write destination +# $fh: Existing filehandle of an open file to use +sub insert_writer_code { + write_line_pp(__LINE__, 0); + print "/* Auto-generated CFSML data writer code */\n"; + print " $types{$type}{'writer'}($fh, $datap);\n"; + print " fprintf($fh, \"\\n\");\n"; + print "/* End of auto-generated CFSML data writer code */\n"; +} + + +################ +# Main program # +################ + +$parsing = 0; +$struct = undef; # Not working on a struct +$commentmode = undef; +$line = 0; + +while (<STDIN>) { + + $line++; + + if ($parsing) { + ($data) = split "#"; # Remove shell-style comments + @_ = ($data); + + s/\/\*.*\*\///g; # Remove C-style one-line comments + + ($data) = split "\/\/"; # Remove C++-style comments + @_ = ($data); + + if ($commentmode) { + + if (grep /\*\//, $_) { + ($empty, $_) = split /\*\//; + } else { + @_ = (); # Empty line + } + + } else { + if (grep /\/\*/, $_) { + $commentmode = 1; + ($_) = split /\/\*/; + } + } + + + # Now tokenize: + s/;//; + split /(\".*\"|[,\[\]\(\)\{\}])|\s+/; + + @items = @_; + + @tokens = (); + + $tokens_nr = 0; + for ($n = 0; $n < scalar @items; $n++) { # Get rid of all undefs + if ($_[$n]) { + $_ = $items[$n]; + s/\"//g; + $tokens[$tokens_nr++] = $_; + } + } + + # Now all tokens are in @tokens, and we have $tokens_nr of them. + +# print "//DEBUG: " . join ("|", @tokens) . "\n"; + + if ($tokens_nr) { + if ($tokens_nr == 2 && $tokens[0] eq "%END" && $tokens[1] eq "CFSML") { + + $struct && die "Record $struct needs closing braces in intput file (line $line)."; + + $parsing = 0; + create_function_block; + my $linep = $line + 1; + write_line_pp($linep, 1); + } elsif ($struct) { # Parsing struct + if ($tokens_nr == 1) { + if ($tokens[0] eq "}") { + $struct = undef; + } else { die "Invalid declaration of $token[0] in input file (line $line)\n";}; + } else { # Must be a member declaration + + my @structrecs = (@{$records{$struct}}); + my $newidx = (scalar @structrecs) or "0"; + my %member = (); + $member{'name'} = $tokens[1]; + $member{'type'} = $tokens[0]; + + if ($tokens_nr == 3 && $tokens[1] == "*") { + $tokens_nr = 2; + $member{'name'} = $tokens[2]; + $member{'reftype'} = $tokens[0]; + $member{'type'} = $type_abspointer; + } + + if ($tokens_nr == 4 and $tokens[0] eq $type_pointer) { # Relative pointer + + if (not $tokens[2] eq "RELATIVETO") { + die "Invalid relative pointer declaration in input file (line $line)\n"; + } + + $member{'anchor'} = $tokens[3]; # RelPointer anchor + + } else { # Non-pointer + + if (not $types{$tokens[0]}) { + die "Unknown type $tokens[0] used in input file (line $line)\n"; + } + + if ($tokens_nr > 2) { # Array + + if ($tokens[2] ne "[") { + die "Invalid token '$tokens[2]' in input file (line $line)\n"; + } + + $member{'array'} = "static"; + + if ($tokens[$tokens_nr - 1] ne "]") { + die "Array declaration incorrectly terminated in input file (line $line)\n"; + } + + $parsepos = 3; + + while ($parsepos < $tokens_nr) { + + if ($tokens[$parsepos] eq ",") { + + $parsepos++; + + } elsif ($tokens[$parsepos] eq "STATIC") { + + $member{'array'} = "static"; + $parsepos++; + + } elsif ($tokens[$parsepos] eq "DYNAMIC") { + + $member{'array'} = "dynamic"; + $parsepos++; + + } elsif ($tokens[$parsepos] eq "MAXWRITE") { + + $member{'maxwrite'} = $tokens[$parsepos + 1]; + $parsepos += 2; + + } elsif ($tokens[$parsepos] eq "]") { + + $parsepos++; + if ($parsepos != $tokens_nr) { + die "Error: Invalid tokens after array declaration in input file (line $line)\n"; + + } + } else { + + if ($member{'size'}) { + die "Attempt to use more than one array size in input file (line $line)\n" . + "(Original size was \"$member->{'size'}\", new size is \"$tokens[$parsepos]\"\n"; + } + + $member{'size'} = $tokens[$parsepos]; + $parsepos++; + } + } + + + unless ($member{'size'}) { + die "Array declaration without size in input file (line $line)\n"; + } + } + } + + @{$records{$struct}}->[$newidx] = \%member; + } + } else { # not parsing struct; normal operation. + + if ($tokens[0] eq "TYPE") { # Simple type declaration + + my $newtype = $tokens[1]; + + $types{$newtype}->{'ctype'} = $tokens[2]; + + if ($tokens_nr == 5) { # must be ...LIKE... + + unless ($tokens[3] eq "LIKE") { + die "Invalid TYPE declaration in input file (line $line)\n"; + } + + $types{$newtype}->{'type'} = $types{$tokens[4]}->{'type'}; + $types{$newtype}->{'reader'} = $types{$tokens[4]}->{'reader'}; + $types{$newtype}->{'writer'} = $types{$tokens[4]}->{'writer'}; + + } elsif ($tokens_nr == 6) { # must be ...USING... + + unless ($tokens[3] eq "USING") { + die "Invalid TYPE declaration in input file (line $line)\n"; + } + + $types{$newtype}->{'writer'} = $tokens[4]; + $types{$newtype}->{'reader'} = $tokens[5]; + $types{$newtype}->{'external'} = 'T'; + + } else { + die "Invalid TYPE declaration in input file (line $line)\n"; + } + + } elsif ($tokens[0] eq "RECORD") { + + $struct = $tokens[1]; + if ($types{$struct}) { + die "Attempt to re-define existing type $struct as a struct in input file (line $line)"; + } + $types{$struct}{'type'} = $type_record; + if ($tokens_nr < 3 or $tokens_nr > 6 or $tokens[$tokens_nr - 1] ne "{") { + die "Invalid record declaration in input file (line $line)"; + } + + my $extoffset = 2; + + if ($tokens_nr > 3) { + if ($tokens[2] ne "EXTENDS") { # Record declaration with explicit c type + $types{$struct}{'ctype'} = $tokens[2]; + $extoffset = 3; + } else { # Record name is the same as the c type name + $types{$struct}{'ctype'} = $struct; + } + } elsif ($tokens_nr == 3) { + $types{$struct}{'ctype'} = $struct; + } + + if (($tokens_nr > $extoffset + 1) && ($extoffset + 1 <= $tokens_nr)) { + if ($tokens[$extoffset] ne "EXTENDS") { + die "Invalid or improper keyword \"$tokens[$extoffset]\" in input file (line $line)"; + } + if ($extoffset + 2 >= $tokens_nr) { + die "RECORD \"$struct\" extends on unspecified type in input file (line $line)"; + } + my $ext_type = $tokens[$extoffset + 1]; + + if (!($types{$ext_type}{type} eq $type_record)) { + print "$types{$ext_type}{type}"; + die "RECORD \"$struct\" attempts to extend non-existing or non-record type \"$ext_type\" in input file (line $line)"; + } + + (@{$records{$struct}}) = (@{$records{$ext_type}}); # Copy type information from super type + } + + } else { + die "Invalid declaration \"$tokens[0]\" in line $line"; + } + } + } + + + } else { + + ($subtoken) = split ";"; # Get rid of trailing ;s + $tokens_nr = @tokens = split " ", $subtoken; + + if ($tokens_nr == 1 && $tokens[0] eq "%CFSML") { + + $parsing = 1; + + } elsif ($tokens[0] eq "%CFSMLWRITE" and $tokens[3] eq "INTO" and $tokens_nr >= 5) { + + insert_writer_code($type = $tokens[1], $datap = $tokens[2], $fh = $tokens[4]); + my $templine = $line + 1; + write_line_pp($templine, 1); # Yes, this sucks. + + } elsif (($tokens[0] eq "%CFSMLREAD") or ($tokens[0] eq "%CFSMLREAD-ATOMIC") and $tokens[3] eq "FROM" and $tokens_nr >= 5) { + + my $myeofvar = 0; + my $myfirsttoken = 0; + my $mylinecounter = 0; + + my $idcounter = 5; + + while ($idcounter < $tokens_nr) { + if ($tokens[$idcounter] eq "ERRVAR" and $tokens_nr >= $idcounter + 2) { + $myeofvar = $tokens[$idcounter + 1]; + $idcounter += 2; + } elsif ($tokens[$idcounter] eq "FIRSTTOKEN" and $tokens_nr >= $idcounter + 2) { + $myfirsttoken = $tokens[$idcounter + 1]; + $idcounter += 2; + } elsif ($tokens[$idcounter] eq "LINECOUNTER" and $tokens_nr >= $idcounter + 2) { + $mylinecounter = $tokens[$idcounter + 1]; + $idcounter += 2; + } else { + die "Unknown %CFSMLREAD operational token: $tokens[$idcounter]\n"; + } + } + insert_reader_code($type = $tokens[1], $datap = $tokens[2], + $fh = $tokens[4], $eofvar = $myeofvar, $firsttoken = $myfirsttoken, + $linecounter = $mylinecounter, $atomic = ($tokens[0] eq "%CFSMLREAD-ATOMIC")); + my $templine = $line + 1; + write_line_pp($templine, 1); # Yes, this sucks, too. + + } else { + print; + } + } + +} + +if ($parsing) { + print <STDERR>, "Warning: Missing %END CFSML\n"; +} diff --git a/engines/sci/engine/said.y b/engines/sci/engine/said.y new file mode 100644 index 0000000000..fdef2360ae --- /dev/null +++ b/engines/sci/engine/said.y @@ -0,0 +1,950 @@ +/*************************************************************************** + said.y Copyright (C) 1999 Christoph Reichenbach + + + This program may be modified and copied freely according to the terms of + the GNU general public license (GPL), as long as the above copyright + notice and the licensing information contained herein are preserved. + + Please refer to www.gnu.org for licensing details. + + This work is provided AS IS, without warranty of any kind, expressed or + implied, including but not limited to the warranties of merchantibility, + noninfringement, and fitness for a specific purpose. The author will not + be held liable for any damage caused by this work or derivatives of it. + + By using this source code, you agree to the licensing terms as stated + above. + + + Please contact the maintainer for bug reports or inquiries. + + Current Maintainer: + + Christoph Reichenbach (CJR) [jameson@linuxgames.com] + +***************************************************************************/ + +%{ + +#include <engine.h> + +#define SAID_BRANCH_NULL 0 + +#define MAX_SAID_TOKENS 128 + +/* Maximum number of words to be expected in a parsed sentence */ +#define AUGMENT_MAX_WORDS 64 + + +#define ANYWORD 0xfff + +#define WORD_TYPE_BASE 0x141 +#define WORD_TYPE_REF 0x144 +#define WORD_TYPE_SYNTACTIC_SUGAR 0x145 + +#define AUGMENT_SENTENCE_PART_BRACKETS 0x152 + +/* Minor numbers */ +#define AUGMENT_SENTENCE_MINOR_MATCH_PHRASE 0x14c +#define AUGMENT_SENTENCE_MINOR_MATCH_WORD 0x153 +#define AUGMENT_SENTENCE_MINOR_RECURSE 0x144 +#define AUGMENT_SENTENCE_MINOR_PARENTHESES 0x14f + + +#undef YYDEBUG /*1*/ +/*#define SAID_DEBUG*/ +/*#define SCI_DEBUG_PARSE_TREE_AUGMENTATION*/ /* uncomment to debug parse tree augmentation*/ + + +#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION +#define scidprintf sciprintf +#else +#define scidprintf if (0) sciprintf +#endif + + +static char *said_parse_error; + +static int said_token; +static int said_tokens_nr; +static int said_tokens[MAX_SAID_TOKENS]; + +static int said_blessed; /* increminated by said_top_branch */ + +static int said_tree_pos; /* Set to 0 if we're out of space */ +#define SAID_TREE_START 4; /* Reserve space for the 4 top nodes */ + +#define VALUE_IGNORE -424242 + +static parse_tree_node_t said_tree[VOCAB_TREE_NODES]; + +typedef int wgroup_t; +typedef int tree_t; +typedef int said_spec_t; + +static tree_t +said_aug_branch(int, int, tree_t, tree_t); + +static tree_t +said_attach_branch(tree_t, tree_t); +/* +static tree_t +said_wgroup_branch(wgroup_t); +*/ +static said_spec_t +said_top_branch(tree_t); + +static tree_t +said_paren(tree_t, tree_t); + +static tree_t +said_value(int, tree_t); + +static tree_t +said_terminal(int); + + +static int +yylex(void); + +static int +yyerror(char *s) +{ + said_parse_error = sci_strdup(s); + return 1; /* Abort */ +} + +%} + +%token WGROUP /* Word group */ +%token YY_COMMA /* 0xf0 */ +%token YY_AMP /* 0xf1 */ +%token YY_SLASH /* 0xf2 */ +%token YY_PARENO /* 0xf3 */ +%token YY_PARENC /* 0xf4 */ +%token YY_BRACKETSO /* 0xf5 */ +%token YY_BRACKETSC /* 0xf6 */ +%token YY_HASH /* 0xf7 */ +%token YY_LT /* 0xf8 */ +%token YY_GT /* 0xf9 */ +%token YY_BRACKETSO_LT /* special token used to imitate LR(2) behaviour */ +%token YY_BRACKETSO_SLASH /* special token used to imitate LR(2) behaviour */ +%token YY_LT_BRACKETSO /* special token used to imitate LR(2) behaviour */ +%token YY_LT_PARENO /* special token used to imitate LR(2) behaviour */ + +%% + +saidspec : leftspec optcont + { $$ = said_top_branch(said_attach_branch($1, $2)); } + | leftspec midspec optcont + { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, $3))); } + | leftspec midspec rightspec optcont + { $$ = said_top_branch(said_attach_branch($1, said_attach_branch($2, said_attach_branch($3, $4)))); } + ; + + +optcont : /* empty */ + { $$ = SAID_BRANCH_NULL; } + | YY_GT + { $$ = said_paren(said_value(0x14b, said_value(0xf900, said_terminal(0xf900))), SAID_BRANCH_NULL); } + ; + + + +leftspec : /* empty */ + { $$ = SAID_BRANCH_NULL; } + | expr + { $$ = said_paren(said_value(0x141, said_value(0x149, $1)), SAID_BRANCH_NULL); } + ; + + + +midspec : YY_SLASH expr + { $$ = said_aug_branch(0x142, 0x14a, $2, SAID_BRANCH_NULL); } + | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC + { $$ = said_aug_branch(0x152, 0x142, said_aug_branch(0x142, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + | YY_SLASH + { $$ = SAID_BRANCH_NULL; } + ; + + + +rightspec : YY_SLASH expr + { $$ = said_aug_branch(0x143, 0x14a, $2, SAID_BRANCH_NULL); } + | YY_BRACKETSO_SLASH YY_SLASH expr YY_BRACKETSC + { $$ = said_aug_branch(0x152, 0x143, said_aug_branch(0x143, 0x14a, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + | YY_SLASH + { $$ = SAID_BRANCH_NULL; } + ; + + + +word : WGROUP + { $$ = said_paren(said_value(0x141, said_value(0x153, said_terminal($1))), SAID_BRANCH_NULL); } + ; + + +cwordset : wordset + { $$ = said_aug_branch(0x141, 0x14f, $1, SAID_BRANCH_NULL); } + | YY_BRACKETSO wordset YY_BRACKETSC + { $$ = said_aug_branch(0x141, 0x14f, said_aug_branch(0x152, 0x14c, said_aug_branch(0x141, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + ; + + +wordset : word + { $$ = $1; } + | YY_PARENO expr YY_PARENC + { $$ = $1; } + | wordset YY_COMMA wordset + { $$ = said_attach_branch($1, $3); } + | wordset YY_BRACKETSO_LT wordrefset YY_BRACKETSC + { $$ = said_attach_branch($1, $3); } + | wordset YY_COMMA YY_BRACKETSO wordset YY_BRACKETSC + { $$ = said_attach_branch($1, $3); } + ; + + + +expr : cwordset cwordrefset + { $$ = said_attach_branch($1, $2); } + | cwordset + { $$ = $1; } + | cwordrefset + { $$ = $1; } + ; + + + +cwordrefset : wordrefset + { $$ = $1; } + | YY_BRACKETSO_LT wordrefset YY_BRACKETSC + { $$ = said_aug_branch(0x152, 0x144, $2, SAID_BRANCH_NULL); } + | wordrefset YY_BRACKETSO_LT wordrefset YY_BRACKETSC + { $$ = said_attach_branch($1, said_aug_branch(0x152, 0x144, $3, SAID_BRANCH_NULL)); } + ; + + + +wordrefset : YY_LT word recref + { $$ = said_aug_branch(0x144, 0x14f, $2, $3); } + | YY_LT_PARENO YY_PARENO expr YY_PARENC + { $$ = said_aug_branch(0x144, 0x14f, said_aug_branch(0x141, 0x144, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + | YY_LT wordset + { $$ = said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL); } + | YY_LT_BRACKETSO YY_BRACKETSO wordset YY_BRACKETSC + { $$ = said_aug_branch(0x152, 0x144, said_aug_branch(0x144, 0x14f, $3, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + ; + + + +recref : YY_LT wordset recref + { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), $3); } + | YY_LT wordset + { $$ = said_aug_branch(0x141, 0x144, said_aug_branch(0x144, 0x14f, $2, SAID_BRANCH_NULL), SAID_BRANCH_NULL); } + | YY_LT_PARENO YY_PARENO expr YY_PARENC + { $$ = said_aug_branch(0x141, 0x14c, $2, SAID_BRANCH_NULL); } + ; + + + +%% + + +int +parse_yy_token_lookup[] = {YY_COMMA, YY_AMP, YY_SLASH, YY_PARENO, YY_PARENC, YY_BRACKETSO, YY_BRACKETSC, + YY_HASH, YY_LT, YY_GT}; + +static int +yylex(void) +{ + int retval = said_tokens[said_token++]; + + if (retval < SAID_LONG(SAID_FIRST)) { + yylval = retval; + retval = WGROUP; + } else { + retval >>= 8; + + if (retval == SAID_TERM) + retval = 0; + else { + assert(retval >= SAID_FIRST); + retval = parse_yy_token_lookup[retval - SAID_FIRST]; + if (retval == YY_BRACKETSO) { + if ((said_tokens[said_token] >> 8) == SAID_LT) + retval = YY_BRACKETSO_LT; + else + if ((said_tokens[said_token] >> 8) == SAID_SLASH) + retval = YY_BRACKETSO_SLASH; + } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_BRACKO) { + retval = YY_LT_BRACKETSO; + } else if (retval == YY_LT && (said_tokens[said_token] >> 8) == SAID_PARENO) { + retval = YY_LT_PARENO; + } + } + } + + return retval; +} + +#define SAID_NEXT_NODE ((said_tree_pos == 0) || (said_tree_pos >= VOCAB_TREE_NODES))? said_tree_pos = 0 : said_tree_pos++ + +static inline int +said_leaf_node(tree_t pos, int value) +{ + said_tree[pos].type = PARSE_TREE_NODE_LEAF; + + if (value != VALUE_IGNORE) + said_tree[pos].content.value = value; + + return pos; +} + +static inline int +said_branch_node(tree_t pos, int left, int right) +{ + said_tree[pos].type = PARSE_TREE_NODE_BRANCH; + + if (left != VALUE_IGNORE) + said_tree[pos].content.branches[0] = left; + + if (right != VALUE_IGNORE) + said_tree[pos].content.branches[1] = right; + + return pos; +} + + +static tree_t +said_paren(tree_t t1, tree_t t2) +{ + if (t1) + return said_branch_node(SAID_NEXT_NODE, + t1, + t2 + ); + else + return t2; +} + +static tree_t +said_value(int val, tree_t t) +{ + return said_branch_node(SAID_NEXT_NODE, + said_leaf_node(SAID_NEXT_NODE, val), + t + ); + +} + +static tree_t +said_terminal(int val) +{ + return said_leaf_node(SAID_NEXT_NODE, val); +} + + +static tree_t +said_aug_branch(int n1, int n2, tree_t t1, tree_t t2) +{ + int retval; + + retval = said_branch_node(SAID_NEXT_NODE, + said_branch_node(SAID_NEXT_NODE, + said_leaf_node(SAID_NEXT_NODE, n1), + said_branch_node(SAID_NEXT_NODE, + said_leaf_node(SAID_NEXT_NODE, n2), + t1 + ) + ), + t2 + ); + +#ifdef SAID_DEBUG + fprintf(stderr,"AUG(0x%x, 0x%x, [%04x], [%04x]) = [%04x]\n", n1, n2, t1, t2, retval); +#endif + + return retval; +} + +static tree_t +said_attach_branch(tree_t base, tree_t attacheant) +{ +#ifdef SAID_DEBUG + fprintf(stderr,"ATT2([%04x], [%04x]) = [%04x]\n", base, attacheant, base); +#endif + + if (!attacheant) + return base; + if (!base) + return attacheant; + + if (!base) + return 0; /* Happens if we're out of space */ + + said_branch_node(base, VALUE_IGNORE, attacheant); + + return base; +} + +static said_spec_t +said_top_branch(tree_t first) +{ +#ifdef SAID_DEBUG + fprintf(stderr, "TOP([%04x])\n", first); +#endif + said_branch_node(0, 1, 2); + said_leaf_node(1, 0x141); /* Magic number #1 */ + said_branch_node(2, 3, first); + said_leaf_node(3, 0x13f); /* Magic number #2 */ + + ++said_blessed; + + return 0; +} + + +int +said_parse_spec(state_t *s, byte *spec) +{ + int nextitem; + + said_parse_error = NULL; + said_token = 0; + said_tokens_nr = 0; + said_blessed = 0; + + said_tree_pos = SAID_TREE_START; + + do { + nextitem = *spec++; + if (nextitem < SAID_FIRST) + said_tokens[said_tokens_nr++] = nextitem << 8 | *spec++; + else + said_tokens[said_tokens_nr++] = SAID_LONG(nextitem); + + } while ((nextitem != SAID_TERM) && (said_tokens_nr < MAX_SAID_TOKENS)); + + if (nextitem == SAID_TERM) + yyparse(); + else { + sciprintf("Error: SAID spec is too long\n"); + return 1; + } + + if (said_parse_error) { + sciprintf("Error while parsing SAID spec: %s\n", said_parse_error); + free(said_parse_error); + return 1; + } + + if (said_tree_pos == 0) { + sciprintf("Error: Out of tree space while parsing SAID spec\n"); + return 1; + } + + if (said_blessed != 1) { + sciprintf("Error: Found %d top branches\n"); + return 1; + } + + return 0; +} + +/**********************/ +/**** Augmentation ****/ +/**********************/ + + +/** primitive functions **/ + +#define AUG_READ_BRANCH(a, br, p) \ + if (tree[p].type != PARSE_TREE_NODE_BRANCH) \ + return 0; \ + a = tree[p].content.branches[br]; + +#define AUG_READ_VALUE(a, p) \ + if (tree[p].type != PARSE_TREE_NODE_LEAF) \ + return 0; \ + a = tree[p].content.value; + +#define AUG_ASSERT(i) \ + if (!i) return 0; + +static int +aug_get_next_sibling(parse_tree_node_t *tree, int pos, int *first, int *second) + /* Returns the next sibling relative to the specified position in 'tree', + ** sets *first and *second to its augment node values, returns the new position + ** or 0 if there was no next sibling + */ +{ + int seek, valpos; + + AUG_READ_BRANCH(pos, 1, pos); + AUG_ASSERT(pos); + AUG_READ_BRANCH(seek, 0, pos); + AUG_ASSERT(seek); + + /* Now retreive first value */ + AUG_READ_BRANCH(valpos, 0, seek); + AUG_ASSERT(valpos); + AUG_READ_VALUE(*first, valpos); + + /* Get second value */ + AUG_READ_BRANCH(seek, 1, seek); + AUG_ASSERT(seek); + AUG_READ_BRANCH(valpos, 0, seek); + AUG_ASSERT(valpos); + AUG_READ_VALUE(*second, valpos); + + return pos; +} + + +static int +aug_get_wgroup(parse_tree_node_t *tree, int pos) + /* Returns 0 if pos in tree is not the root of a 3-element list, otherwise + ** it returns the last element (which, in practice, is the word group + */ +{ + int val; + + AUG_READ_BRANCH(pos, 0, pos); + AUG_ASSERT(pos); + AUG_READ_BRANCH(pos, 1, pos); + AUG_ASSERT(pos); + AUG_READ_BRANCH(pos, 1, pos); + AUG_ASSERT(pos); + AUG_READ_VALUE(val, pos); + + return val; +} + + +static int +aug_get_base_node(parse_tree_node_t *tree) +{ + int startpos = 0; + AUG_READ_BRANCH(startpos, 1, startpos); + return startpos; +} + + +/** semi-primitive functions **/ + + +static int +aug_get_first_child(parse_tree_node_t *tree, int pos, int *first, int *second) + /* like aug_get_next_sibling, except that it recurses into the tree and + ** finds the first child (usually *not* Ayanami Rei) of the current branch + ** rather than its next sibling. + */ +{ + AUG_READ_BRANCH(pos, 0, pos); + AUG_ASSERT(pos); + AUG_READ_BRANCH(pos, 1, pos); + AUG_ASSERT(pos); + + return aug_get_next_sibling(tree, pos, first, second); +} + +static void +aug_find_words_recursively(parse_tree_node_t *tree, int startpos, + int *base_words, int *base_words_nr, + int *ref_words, int *ref_words_nr, + int maxwords, int refbranch) + /* Finds and lists all base (141) and reference (144) words */ +{ + int major, minor; + int word; + int pos = aug_get_first_child(tree, startpos, &major, &minor); + + /* if (major == WORD_TYPE_REF) + refbranch = 1;*/ + + while (pos) { + if ((word = aug_get_wgroup(tree, pos))) { /* found a word */ + + if (!refbranch && major == WORD_TYPE_BASE) { + if ((*base_words_nr) == maxwords) { + sciprintf("Out of regular words\n"); + return; /* return gracefully */ + } + + base_words[*base_words_nr] = word; /* register word */ + ++(*base_words_nr); + + } + if (major == WORD_TYPE_REF || refbranch) { + if ((*ref_words_nr) == maxwords) { + sciprintf("Out of reference words\n"); + return; /* return gracefully */ + } + + ref_words[*ref_words_nr] = word; /* register word */ + ++(*ref_words_nr); + + } + if (major != WORD_TYPE_SYNTACTIC_SUGAR && major != WORD_TYPE_BASE && major != WORD_TYPE_REF) + sciprintf("aug_find_words_recursively(): Unknown word type %03x\n", major); + + } else /* Did NOT find a word group: Attempt to recurse */ + aug_find_words_recursively(tree, pos, base_words, base_words_nr, + ref_words, ref_words_nr, maxwords, refbranch || major == WORD_TYPE_REF); + + pos = aug_get_next_sibling(tree, pos, &major, &minor); + } +} + + +static void +aug_find_words(parse_tree_node_t *tree, int startpos, + int *base_words, int *base_words_nr, + int *ref_words, int *ref_words_nr, + int maxwords) + /* initializing wrapper for aug_find_words_recursively() */ +{ + *base_words_nr = 0; + *ref_words_nr = 0; + + aug_find_words_recursively(tree, startpos, base_words, base_words_nr, ref_words, ref_words_nr, maxwords, 0); +} + + +static inline int +aug_contains_word(int *list, int length, int word) +{ + int i; + if (word == ANYWORD) + return (length); + + for (i = 0; i < length; i++) + if (list[i] == word) + return 1; + + return 0; +} + + +static int +augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, + parse_tree_node_t *parset, int parse_branch, + int major, int minor, + int *base_words, int base_words_nr, + int *ref_words, int ref_words_nr); + +static int +augment_match_expression_p(parse_tree_node_t *saidt, int augment_pos, + parse_tree_node_t *parset, int parse_basepos, + int major, int minor, + int *base_words, int base_words_nr, + int *ref_words, int ref_words_nr) +{ + int cmajor, cminor, cpos; + cpos = aug_get_first_child(saidt, augment_pos, &cmajor, &cminor); + if (!cpos) { + sciprintf("augment_match_expression_p(): Empty condition\n"); + return 1; + } + + scidprintf("Attempting to match (%03x %03x (%03x %03x\n", major, minor, cmajor, cminor); + + if ((major == WORD_TYPE_BASE) && (minor == AUGMENT_SENTENCE_MINOR_RECURSE)) + return augment_match_expression_p(saidt, cpos, + parset, parse_basepos, + cmajor, cminor, + base_words, base_words_nr, + ref_words, ref_words_nr); + + + switch (major) { + + case WORD_TYPE_BASE: + while (cpos) { + if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { + int word = aug_get_wgroup(saidt, cpos); + scidprintf("Looking for word %03x\n", word); + + if (aug_contains_word(base_words, base_words_nr, word)) + return 1; + } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { + if (augment_sentence_expression(saidt, cpos, + parset, parse_basepos, + cmajor, cminor, + base_words, base_words_nr, + ref_words, ref_words_nr)) + return 1; + } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { + int gc_major, gc_minor; + int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); + + while (gchild) { + if (augment_match_expression_p(saidt, cpos, + parset, parse_basepos, + major, minor, + base_words, base_words_nr, + ref_words, ref_words_nr)) + return 1; + gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); + } + } else + sciprintf("augment_match_expression_p(): Unknown type 141 minor number %3x\n", cminor); + + cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); + + } + break; + + case WORD_TYPE_REF: + while (cpos) { + if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_WORD) { + int word = aug_get_wgroup(saidt, cpos); + scidprintf("Looking for refword %03x\n", word); + + if (aug_contains_word(ref_words, ref_words_nr, word)) + return 1; + } else if (cminor == AUGMENT_SENTENCE_MINOR_MATCH_PHRASE) { + if (augment_match_expression_p(saidt, cpos, + parset, parse_basepos, + cmajor, cminor, + base_words, base_words_nr, + ref_words, ref_words_nr)) + return 1; + } else if (cminor == AUGMENT_SENTENCE_MINOR_PARENTHESES) { + int gc_major, gc_minor; + int gchild = aug_get_first_child(saidt, cpos, &gc_major, &gc_minor); + + while (gchild) { + if (augment_match_expression_p(saidt, cpos, + parset, parse_basepos, + major, minor, + base_words, base_words_nr, + ref_words, ref_words_nr)) + return 1; + gchild = aug_get_next_sibling(saidt, gchild, &gc_major, &gc_minor); + } + } else + sciprintf("augment_match_expression_p(): Unknown type 144 minor number %3x\n", cminor); + + cpos = aug_get_next_sibling(saidt, cpos, &cmajor, &cminor); + + } + break; + + case AUGMENT_SENTENCE_PART_BRACKETS: + if (augment_match_expression_p(saidt, cpos, + parset, parse_basepos, + cmajor, cminor, + base_words, base_words_nr, + ref_words, ref_words_nr)) + return 1; + + scidprintf("Didn't match subexpression; checking sub-bracked predicate %03x\n", cmajor); + + switch (cmajor) { + case WORD_TYPE_BASE: + if (!base_words_nr) + return 1; + break; + + case WORD_TYPE_REF: + if (!ref_words_nr) + return 1; + break; + + default: + sciprintf("augment_match_expression_p(): (subp1) Unkonwn sub-bracket predicate %03x\n", cmajor); + } + + break; + + default: + sciprintf("augment_match_expression_p(): Unknown predicate %03x\n", major); + + } + + scidprintf("Generic failure\n"); + return 0; +} + +static int +augment_sentence_expression(parse_tree_node_t *saidt, int augment_pos, + parse_tree_node_t *parset, int parse_branch, + int major, int minor, + int *base_words, int base_words_nr, + int *ref_words, int ref_words_nr) +{ + int check_major, check_minor; + int check_pos = aug_get_first_child(saidt, augment_pos, &check_major, &check_minor); + do { + if (!(augment_match_expression_p(saidt, check_pos, parset, parse_branch, + check_major, check_minor, base_words, base_words_nr, + ref_words, ref_words_nr))) + return 0; + } while ((check_pos = aug_get_next_sibling(saidt, check_pos, &check_major, &check_minor))); + return 1; +} + + + +static int +augment_sentence_part(parse_tree_node_t *saidt, int augment_pos, + parse_tree_node_t *parset, int parse_basepos, + int major, int minor) +{ + int pmajor, pminor; + int parse_branch = parse_basepos; + int optional = 0; + int foundwords = 0; + + scidprintf("Augmenting (%03x %03x\n", major, minor); + + if (major == AUGMENT_SENTENCE_PART_BRACKETS) { /* '[/ foo]' is true if '/foo' or if there + ** exists no x for which '/x' is true + */ + if ((augment_pos = aug_get_first_child(saidt, augment_pos, &major, &minor))) { + scidprintf("Optional part: Now augmenting (%03x %03x\n", major, minor); + optional = 1; + } else { + scidprintf("Matched empty optional expression\n"); + return 1; + } + } + + if ((major < 0x141) + || (major > 0x143)) { + scidprintf("augment_sentence_part(): Unexpected sentence part major number %03x\n", major); + return 0; + } + + while ((parse_branch = aug_get_next_sibling(parset, parse_branch, &pmajor, &pminor))) + if (pmajor == major) { /* found matching sentence part */ + int success; + int base_words_nr; + int ref_words_nr; + int base_words[AUGMENT_MAX_WORDS]; + int ref_words[AUGMENT_MAX_WORDS]; +#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION + int i; +#endif + + scidprintf("Found match with pminor = %03x\n", pminor); + aug_find_words(parset, parse_branch, base_words, &base_words_nr, + ref_words, &ref_words_nr, AUGMENT_MAX_WORDS); + foundwords |= (ref_words_nr | base_words_nr); +#ifdef SCI_DEBUG_PARSE_TREE_AUGMENTATION + sciprintf("%d base words:", base_words_nr); + for (i = 0; i < base_words_nr; i++) + sciprintf(" %03x", base_words[i]); + sciprintf("\n%d reference words:", ref_words_nr); + for (i = 0; i < ref_words_nr; i++) + sciprintf(" %03x", ref_words[i]); + sciprintf("\n"); +#endif + + success = augment_sentence_expression(saidt, augment_pos, + parset, parse_basepos, major, minor, + base_words, base_words_nr, + ref_words, ref_words_nr); + + if (success) { + scidprintf("SUCCESS on augmenting (%03x %03x\n", major, minor); + return 1; + } + } + + if (optional && (foundwords == 0)) { + scidprintf("Found no words and optional branch => SUCCESS on augmenting (%03x %03x\n", major, minor); + return 1; + } + scidprintf("FAILURE on augmenting (%03x %03x\n", major, minor); + return 0; +} + +static int +augment_parse_nodes(parse_tree_node_t *parset, parse_tree_node_t *saidt) +{ + int augment_basepos = 0; + int parse_basepos; + int major, minor; + int dontclaim = 0; + + parse_basepos = aug_get_base_node(parset); + if (!parse_basepos) { + sciprintf("augment_parse_nodes(): Parse tree is corrupt\n"); + return 0; + } + + augment_basepos = aug_get_base_node(saidt); + if (!augment_basepos) { + sciprintf("augment_parse_nodes(): Said tree is corrupt\n"); + return 0; + } + while ((augment_basepos = aug_get_next_sibling(saidt, augment_basepos, &major, &minor))) { + + if ((major == 0x14b) + && (minor == SAID_LONG(SAID_GT))) + dontclaim = 1; /* special case */ + else /* normal sentence part */ + if (!(augment_sentence_part(saidt, augment_basepos, parset, parse_basepos, major, minor))) { + scidprintf("Returning failure\n"); + return 0; /* fail */ + } + } + + scidprintf("Returning success with dontclaim=%d\n", dontclaim); + + if (dontclaim) + return SAID_PARTIAL_MATCH; + else return 1; /* full match */ +} + + +/*******************/ +/**** Main code ****/ +/*******************/ + +int +said(state_t *s, byte *spec, int verbose) +{ + int retval; + + parse_tree_node_t *parse_tree_ptr = s->parser_nodes; + + if (s->parser_valid) { + + if (said_parse_spec(s, spec)) { + sciprintf("Offending spec was: "); + vocab_decypher_said_block(s, spec); + return SAID_NO_MATCH; + } + + if (verbose) + vocab_dump_parse_tree("Said-tree", said_tree); /* Nothing better to do yet */ + retval = augment_parse_nodes(parse_tree_ptr, &(said_tree[0])); + + if (!retval) + return SAID_NO_MATCH; + else if (retval != SAID_PARTIAL_MATCH) + return SAID_FULL_MATCH; + else return SAID_PARTIAL_MATCH; + } + + return SAID_NO_MATCH; +} + + + +#ifdef SAID_DEBUG_PROGRAM +int +main (int argc, char *argv) +{ + byte block[] = {0x01, 0x00, 0xf8, 0xf5, 0x02, 0x01, 0xf6, 0xf2, 0x02, 0x01, 0xf2, 0x01, 0x03, 0xff}; + state_t s; + con_passthrough = 1; + + s.parser_valid = 1; + said(&s, block); +} +#endif |