1 files changed, 720 insertions, 0 deletions
diff --git a/engines/sci/engine/grammar.cpp b/engines/sci/engine/grammar.cpp
new file mode 100644
index 0000000000..7daf735b6d
--- /dev/null
+++ b/engines/sci/engine/grammar.cpp
@@ -0,0 +1,720 @@
+/**************************************************************************
+ grammar.c Copyright (C) 2000 Christoph Reichenbach
+
+
+ This program may be modified and copied freely according to the terms of
+ the GNU general public license (GPL), as long as the above copyright
+ notice and the licensing information contained herein are preserved.
+
+ Please refer to www.gnu.org for licensing details.
+
+ This work is provided AS IS, without warranty of any kind, expressed or
+ implied, including but not limited to the warranties of merchantibility,
+ noninfringement, and fitness for a specific purpose. The author will not
+ be held liable for any damage caused by this work or derivatives of it.
+
+ By using this source code, you agree to the licensing terms as stated
+ above.
+
+***************************************************************************/
+
+/* Functionality to transform the context-free SCI grammar rules into
+** strict Greibach normal form (strict GNF), and to test SCI input against
+** that grammar, writing an appropriate node tree if successful.
+*/
+
+#include "sci/include/resource.h"
+#include "sci/include/vocabulary.h"
+#include "sci/include/console.h"
+#include <stdarg.h>
+#include <string.h>
+
+#define TOKEN_OPAREN 0xff000000
+#define TOKEN_CPAREN 0xfe000000
+#define TOKEN_TERMINAL_CLASS 0x10000
+#define TOKEN_TERMINAL_GROUP 0x20000
+#define TOKEN_STUFFING_WORD 0x40000
+#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_WORD)
+#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
+
+ int _allocd_rules = 0;
+
+static void
+vocab_print_rule(parse_rule_t *rule)
+{
+  int i;
+  int wspace = 0;
+
+  if (!rule) {
+    sciprintf("NULL rule");
+    return;
+  }
+
+  sciprintf("[%03x] -> ", rule->id);
+
+  if (!rule->length)
+    sciprintf("e");
+
+  for(i = 0; i < rule->length; i++) {
+    int token = rule->data[i];
+
+    if (token == TOKEN_OPAREN) {
+
+      if (i == rule->first_special)
+	sciprintf("_");
+
+      sciprintf("(");
+      wspace = 0;
+    } else if (token == TOKEN_CPAREN) {
+
+      if (i == rule->first_special)
+	sciprintf("_");
+
+      sciprintf(")");
+      wspace = 0;
+    } else {
+      if (wspace)
+	sciprintf(" ");
+
+      if (i == rule->first_special)
+	sciprintf("_");
+      if (token & TOKEN_TERMINAL_CLASS)
+	sciprintf("C(%04x)", token & 0xffff);
+      else if (token & TOKEN_TERMINAL_GROUP)
+	sciprintf("G(%04x)", token & 0xffff);
+      else if (token & TOKEN_STUFFING_WORD)
+	sciprintf("%03x", token & 0xffff);
+      else
+	sciprintf("[%03x]", token); /* non-terminal */
+      wspace = 1;
+    }
+
+    if (i == rule->first_special)
+      sciprintf("_");
+  }
+  sciprintf(" [%d specials]", rule->specials_nr);
+}
+
+
+static void
+_vfree(parse_rule_t *rule)
+{
+  free(rule);
+  --_allocd_rules;
+  rule = NULL;
+}
+
+static parse_rule_t *
+_vbuild(int id, int argc, ...)
+{
+  va_list args;
+  int i;
+  parse_rule_t *rule = (parse_rule_t*)sci_malloc(sizeof(int) * (argc + 4));
+
+  ++_allocd_rules;
+  rule->id = id;
+  rule->first_special = 0;
+  rule->specials_nr = 0;
+  rule->length = argc;
+  va_start(args, argc);
+  for (i = 0; i < argc; i++) {
+    int v;
+    rule->data[i] = v = va_arg(args, int);
+    if ((v & TOKEN_TERMINAL)
+	|| !(v & TOKEN_NON_NT)) {
+
+      ++rule->specials_nr;
+
+      if (!rule->first_special)
+	rule->first_special = i;
+    }
+  }
+  va_end(args);
+  return rule;
+}
+
+static parse_rule_t *
+_vcat(int id, parse_rule_t *a, parse_rule_t *b)
+{
+  parse_rule_t *rule = (parse_rule_t*)sci_malloc(sizeof(int) * (a->length + b->length + 4));
+
+  rule->id = id;
+  rule->length = a->length + b->length;
+  rule->specials_nr = a->specials_nr + b->specials_nr;
+  rule->first_special = a->first_special;
+  ++_allocd_rules;
+
+  memcpy(rule->data, a->data, sizeof(int) * a->length);
+  memcpy(&(rule->data[a->length]), b->data, sizeof(int) * b->length);
+
+  return rule;
+}
+
+static parse_rule_t *
+_vdup(parse_rule_t *a)
+{
+  parse_rule_t *rule = (parse_rule_t*)sci_malloc(sizeof(int) * (a->length + 4));
+
+  rule->id = a->id;
+  rule->length = a->length;
+  rule->specials_nr = a->specials_nr;
+  rule->first_special = a->first_special;
+  ++_allocd_rules;
+
+  memcpy(rule->data, a->data, sizeof(int) * a->length);
+
+  return rule;
+}
+
+static parse_rule_t *
+_vinsert(parse_rule_t *turkey, parse_rule_t *stuffing)
+{
+  int firstnt = turkey->first_special;
+  parse_rule_t *rule;
+
+  while ((firstnt < turkey->length)
+	 && (turkey->data[firstnt] & TOKEN_NON_NT))
+    firstnt++;
+
+  if ((firstnt == turkey->length)
+      || (turkey->data[firstnt] != stuffing->id))
+    return NULL;
+
+  rule = (parse_rule_t*)sci_malloc(sizeof(int) * (turkey->length - 1 + stuffing->length + 4));
+  rule->id = turkey->id;
+  rule->specials_nr = turkey->specials_nr + stuffing->specials_nr - 1;
+  rule->first_special = firstnt + stuffing->first_special;
+  rule->length = turkey->length - 1 + stuffing->length;
+  ++_allocd_rules;
+
+  if (firstnt > 0)
+    memcpy(rule->data, turkey->data, sizeof(int) * firstnt);
+  memcpy(&(rule->data[firstnt]), stuffing->data, sizeof(int) * stuffing->length);
+  if (firstnt < turkey->length - 1)
+    memcpy(&(rule->data[firstnt + stuffing->length]), &(turkey->data[firstnt + 1]),
+	   sizeof(int) * (turkey->length - firstnt - 1));
+
+  return rule;
+}
+
+
+static int
+_greibach_rule_p(parse_rule_t *rule)
+{
+  int pos = rule->first_special;
+  while (pos < rule->length
+	 && (rule->data[pos] & TOKEN_NON_NT)
+	 && !(rule->data[pos] & TOKEN_TERMINAL))
+    ++pos;
+
+  if (pos == rule->length)
+    return 0;
+
+  return (rule->data[pos] & TOKEN_TERMINAL);
+}
+
+static parse_rule_t *
+_vbuild_rule(parse_tree_branch_t *branch)
+{
+  parse_rule_t *rule;
+  int tokens = 0, tokenpos = 0, i;
+
+  while (tokenpos < 10 && branch->data[tokenpos]) {
+    int type = branch->data[tokenpos];
+    tokenpos += 2;
+
+    if ((type == VOCAB_TREE_NODE_COMPARE_TYPE)
+	|| (type == VOCAB_TREE_NODE_COMPARE_GROUP)
+	|| (type == VOCAB_TREE_NODE_FORCE_STORAGE))
+      ++tokens;
+    else if (type > VOCAB_TREE_NODE_LAST_WORD_STORAGE)
+      tokens += 5;
+    else return NULL; /* invalid */
+  }
+
+  rule = (parse_rule_t*)sci_malloc(sizeof(int) * (4 + tokens));
+
+  ++_allocd_rules;
+  rule->id = branch->id;
+  rule->specials_nr = tokenpos >> 1;
+  rule->length = tokens;
+  rule->first_special = 0;
+
+  tokens = 0;
+  for (i = 0; i < tokenpos; i += 2) {
+    int type = branch->data[i];
+    int value = branch->data[i + 1];
+
+    if (type == VOCAB_TREE_NODE_COMPARE_TYPE)
+      rule->data[tokens++] = value | TOKEN_TERMINAL_CLASS;
+    else if (type == VOCAB_TREE_NODE_COMPARE_GROUP)
+      rule->data[tokens++] = value | TOKEN_TERMINAL_GROUP;
+    else if (type == VOCAB_TREE_NODE_FORCE_STORAGE)
+      rule->data[tokens++] = value | TOKEN_STUFFING_WORD;
+    else { /* normal inductive rule */
+      rule->data[tokens++] = TOKEN_OPAREN;
+      rule->data[tokens++] = type | TOKEN_STUFFING_WORD;
+      rule->data[tokens++] = value | TOKEN_STUFFING_WORD;
+
+      if (i == 0)
+	rule->first_special = tokens;
+
+      rule->data[tokens++] = value; /* The non-terminal */
+      rule->data[tokens++] = TOKEN_CPAREN;
+    }
+  }
+
+  return rule;
+}
+
+
+static parse_rule_t *
+_vsatisfy_rule(parse_rule_t *rule, result_word_t *input)
+{
+  int dep;
+
+  if (!rule->specials_nr)
+    return NULL;
+
+  dep = rule->data[rule->first_special];
+
+  if (((dep & TOKEN_TERMINAL_CLASS)
+       && ((dep & 0xffff) & input->w_class))
+      ||
+      ((dep & TOKEN_TERMINAL_GROUP)
+       && ((dep & 0xffff) & input->group))) {
+    parse_rule_t *retval = (parse_rule_t*)sci_malloc(sizeof(int) * (4 + rule->length));
+    ++_allocd_rules;
+    retval->id = rule->id;
+    retval->specials_nr = rule->specials_nr - 1;
+    retval->length = rule->length;
+    memcpy(retval->data, rule->data, sizeof(int) * retval->length);
+    retval->data[rule->first_special] = TOKEN_STUFFING_WORD | input->group;
+    retval->first_special = 0;
+
+    if (retval->specials_nr) { /* find first special, if it exists */
+      int tmp, i = rule->first_special;
+
+      while ((i < rule->length)
+	     && ((tmp = retval->data[i]) & TOKEN_NON_NT)
+	     && !(tmp & TOKEN_TERMINAL))
+	++i;
+
+      if (i < rule->length)
+	retval->first_special = i;
+    }
+
+    return retval;
+  }
+  else return NULL;
+}
+
+/************** Rule lists **************/
+
+void
+vocab_free_rule_list(parse_rule_list_t *list)
+{
+	if (list) {
+		_vfree(list->rule);
+		vocab_free_rule_list(list->next); /* Yep, this is slow and memory-intensive. */
+		free(list);
+	}
+}
+
+static inline int
+_rules_equal_p(parse_rule_t *r1, parse_rule_t *r2)
+{
+	if ((r1->id != r2->id)
+	    || (r1->length != r2->length)
+	    || (r1->first_special != r2->first_special))
+		return 0;
+
+	return !(memcmp(r1->data, r2->data, sizeof(int) * r1->length));
+}
+
+static parse_rule_list_t *
+_vocab_add_rule(parse_rule_list_t *list, parse_rule_t *rule)
+{
+	parse_rule_list_t *new_elem;
+	int term;
+
+	if (!rule)
+		return list;
+
+	new_elem = (parse_rule_list_t*)sci_malloc(sizeof(parse_rule_list_t));
+	term = rule->data[rule->first_special];
+
+	new_elem->rule = rule;
+	new_elem->next = NULL;
+	new_elem->terminal = term = ((term & TOKEN_TERMINAL)? term : 0);
+
+	if (!list)
+		return new_elem;
+	else/* if (term < list->terminal) {
+	       new_elem->next = list;
+	       return new_elem;
+	       } else*/ {
+		parse_rule_list_t *seeker = list;
+
+		while (seeker->next/* && seeker->next->terminal <= term*/) {
+			if (seeker->next->terminal == term)
+				if (_rules_equal_p(seeker->next->rule, rule)) {
+					_vfree(rule);
+					free(new_elem);
+					return list; /* No duplicate rules */
+				}
+			seeker = seeker->next;
+		}
+
+		new_elem->next = seeker->next;
+		seeker->next = new_elem;
+		return list;
+	}
+}
+
+static void
+_vprl(parse_rule_list_t *list, int pos)
+{
+	if (list) {
+		sciprintf("R%03d: ", pos);
+		vocab_print_rule(list->rule);
+		sciprintf("\n");
+		_vprl(list->next, pos+1);
+	} else {
+		sciprintf("%d rules total.\n", pos);
+	}
+}
+
+void
+vocab_print_rule_list(parse_rule_list_t *list)
+{
+	_vprl(list, 0);
+}
+
+static parse_rule_list_t *
+_vocab_split_rule_list(parse_rule_list_t *list)
+{
+	if (!list->next
+	    || (list->next->terminal)) {
+		parse_rule_list_t *tmp = list->next;
+		list->next = NULL;
+		return tmp;
+	}
+	else return _vocab_split_rule_list(list->next);
+}
+
+static void
+_vocab_free_empty_rule_list(parse_rule_list_t *list)
+{
+	if (list->next)
+		_vocab_free_empty_rule_list(list->next);
+
+	free(list);
+}
+
+static parse_rule_list_t *
+_vocab_merge_rule_lists(parse_rule_list_t *l1, parse_rule_list_t *l2)
+{
+	parse_rule_list_t *retval = l1, *seeker = l2;
+	while (seeker) {
+		retval = _vocab_add_rule(retval, seeker->rule);
+		seeker = seeker->next;
+	}
+	_vocab_free_empty_rule_list(l2);
+
+	return retval;
+}
+
+static int
+_vocab_rule_list_length(parse_rule_list_t *list)
+{
+	return ((list)? _vocab_rule_list_length(list->next) + 1 : 0);
+}
+
+
+static parse_rule_list_t *
+_vocab_clone_rule_list_by_id(parse_rule_list_t *list, int id)
+{
+	parse_rule_list_t *result = NULL;
+	parse_rule_list_t *seeker = list;
+
+	while (seeker) {
+		if (seeker->rule->id == id) {
+			result = _vocab_add_rule(result, _vdup(seeker->rule));
+		}
+		seeker = seeker->next;
+	}
+
+	return result;
+}
+
+
+parse_rule_list_t *
+_vocab_build_gnf(parse_tree_branch_t *branches, int branches_nr, int verbose)
+{
+	int i;
+	int iterations = 0;
+	int last_termrules, termrules = 0;
+	int ntrules_nr;
+	parse_rule_list_t *ntlist = NULL;
+	parse_rule_list_t *tlist, *new_tlist;
+
+	for (i = 1; i < branches_nr; i++) { /* branch rule 0 is treated specially */
+		parse_rule_t *rule = _vbuild_rule(branches + i);
+
+		if (!rule) return NULL;
+		ntlist = _vocab_add_rule(ntlist, rule);
+	}
+
+	tlist = _vocab_split_rule_list(ntlist);
+	ntrules_nr = _vocab_rule_list_length(ntlist);
+
+	if (verbose)
+		sciprintf("Starting with %d rules\n", ntrules_nr);
+
+	new_tlist = tlist;
+	tlist = NULL;
+
+	do {
+		parse_rule_list_t *new_new_tlist = NULL;
+		parse_rule_list_t *ntseeker, *tseeker;
+		last_termrules = termrules;
+
+		ntseeker = ntlist;
+		while (ntseeker) {
+			tseeker = new_tlist;
+
+			while (tseeker) {
+				parse_rule_t *newrule = _vinsert(ntseeker->rule, tseeker->rule);
+				if (newrule)
+					new_new_tlist = _vocab_add_rule(new_new_tlist, newrule);
+				tseeker = tseeker->next;
+			}
+
+			ntseeker = ntseeker->next;
+		}
+
+		tlist = _vocab_merge_rule_lists(tlist, new_tlist);
+
+		new_tlist = new_new_tlist;
+
+		termrules = _vocab_rule_list_length(new_new_tlist);
+
+		if (verbose)
+			sciprintf("After iteration #%d: %d new term rules\n", ++iterations, termrules);
+	} while (termrules && (iterations < 30));
+
+	vocab_free_rule_list(ntlist);
+
+	if (verbose) {
+		sciprintf("\nGNF rules:\n");
+		vocab_print_rule_list(tlist);
+	}
+
+	return tlist;
+}
+
+parse_rule_list_t *
+vocab_build_gnf(parse_tree_branch_t *branches, int branches_nr)
+{
+  return _vocab_build_gnf(branches, branches_nr, 0);
+}
+
+
+void
+vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr)
+{
+  parse_rule_list_t *tlist = _vocab_build_gnf(branches, branches_nr, 1);
+
+  sciprintf("%d allocd rules\n", _allocd_rules);
+  vocab_free_rule_list(tlist);
+}
+
+
+int
+vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+		       parse_tree_branch_t *branch0, parse_rule_list_t *rules)
+{
+  return vocab_gnf_parse(nodes, words, words_nr, branch0, rules, 0);
+}
+
+
+static int
+_vbpt_pareno(parse_tree_node_t *nodes, int *pos, int base)
+/* Opens parentheses */
+{
+  nodes[base].content.branches[0] = (*pos)+1;
+  nodes[++(*pos)].type = PARSE_TREE_NODE_BRANCH;
+  nodes[*pos].content.branches[0] = 0;
+  nodes[*pos].content.branches[1] = 0;
+  return *pos;
+}
+
+
+static int
+_vbpt_parenc(parse_tree_node_t *nodes, int *pos, int paren)
+/* Closes parentheses for appending */
+{
+  nodes[paren].content.branches[1] = ++(*pos);
+  nodes[*pos].type = PARSE_TREE_NODE_BRANCH;
+  nodes[*pos].content.branches[0] = 0;
+  nodes[*pos].content.branches[1] = 0;
+  return *pos;
+}
+
+
+static int
+_vbpt_append(parse_tree_node_t *nodes, int *pos, int base, int value)
+/* writes one value to an existing base node and creates a successor node for writing */
+{
+  nodes[base].content.branches[0] = ++(*pos);
+  nodes[*pos].type = PARSE_TREE_NODE_LEAF;
+  nodes[*pos].content.value = value;
+  nodes[base].content.branches[1] = ++(*pos);
+  nodes[*pos].type = PARSE_TREE_NODE_BRANCH;
+  nodes[*pos].content.branches[0] = 0;
+  nodes[*pos].content.branches[1] = 0;
+  return *pos;
+}
+
+
+static int
+_vbpt_terminate(parse_tree_node_t *nodes, int *pos, int base, int value)
+     /* Terminates, overwriting a nextwrite forknode */
+{
+  nodes[base].type = PARSE_TREE_NODE_LEAF;
+  nodes[base].content.value = value;
+  return *pos;
+}
+
+static int
+_vbpt_write_subexpression(parse_tree_node_t *nodes, int *pos,
+			  parse_rule_t *rule, int rulepos, int writepos)
+{
+  int token;
+  while ((token = ((rulepos < rule->length)? rule->data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) {
+    int nexttoken = (rulepos < rule->length)? rule->data[rulepos] : TOKEN_CPAREN;
+    if (token == TOKEN_OPAREN) {
+      int wpold;
+      int writepos2 = _vbpt_pareno(nodes, pos, wpold = writepos);
+      rulepos = _vbpt_write_subexpression(nodes, pos, rule, rulepos, writepos2);
+      nexttoken = (rulepos < rule->length)? rule->data[rulepos] : TOKEN_CPAREN;
+      if (nexttoken != TOKEN_CPAREN)
+	writepos = _vbpt_parenc(nodes, pos, wpold);
+    } else if (token & TOKEN_STUFFING_WORD) {
+      if (nexttoken == TOKEN_CPAREN)
+	writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
+      else
+	writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
+    } else {
+      sciprintf("\nError in parser (grammar.c, _vbpt_write_subexpression()): Rule data broken in rule ");
+      vocab_print_rule(rule);
+      sciprintf(", at token position %d\n", *pos);
+      return rulepos;
+    }
+  }
+
+  return rulepos;
+}
+
+int
+vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
+		parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose)
+{
+  /* Get the start rules: */
+  parse_rule_list_t *work = _vocab_clone_rule_list_by_id(tlist, branch0->data[1]);
+  parse_rule_list_t *results = NULL;
+  int word;
+
+  for (word = 0; word < words_nr; word++) {
+    parse_rule_list_t *new_work = NULL;
+    parse_rule_list_t *reduced_rules = NULL;
+    parse_rule_list_t *seeker, *subseeker;
+
+    if (verbose)
+      sciprintf("Adding word %d...\n", word);
+
+    seeker = work;
+    while (seeker) {
+
+      if (seeker->rule->specials_nr <= (words_nr - word))
+	reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, words + word));
+
+      seeker = seeker->next;
+    }
+
+    if (reduced_rules == NULL) {
+      vocab_free_rule_list(work);
+      if (verbose)
+	sciprintf("No results.\n");
+      return 1;
+    }
+
+    vocab_free_rule_list(work);
+
+    if (word +1 < words_nr) {
+      seeker = reduced_rules;
+
+      while (seeker) {
+	if (seeker->rule->specials_nr) {
+	  int my_id = seeker->rule->data[seeker->rule->first_special];
+
+	  subseeker = tlist;
+	  while (subseeker) {
+	    if (subseeker->rule->id == my_id)
+	      new_work = _vocab_add_rule(new_work, _vinsert(seeker->rule, subseeker->rule));
+
+	    subseeker = subseeker->next;
+	  }
+	}
+
+	seeker = seeker->next;
+      }
+      vocab_free_rule_list(reduced_rules);
+    } else /* last word */
+      new_work = reduced_rules;
+
+    work = new_work;
+    if (verbose)
+      sciprintf("Now at %d candidates\n", _vocab_rule_list_length(work));
+    if (work == NULL) {
+      if (verbose)
+	sciprintf("No results.\n");
+      return 1;
+    }
+  }
+
+  results = work;
+
+  if (verbose) {
+    sciprintf("All results (excluding the surrounding '(141 %03x' and ')'):\n",
+	      branch0->id);
+    vocab_print_rule_list(results);
+    sciprintf("\n");
+  }
+
+  /* now use the first result */
+  {
+    int temp, pos;
+
+    nodes[0].type = PARSE_TREE_NODE_BRANCH;
+    nodes[0].content.branches[0] = 1;
+    nodes[0].content.branches[1] = 2;
+
+    nodes[1].type = PARSE_TREE_NODE_LEAF;
+    nodes[1].content.value = 0x141;
+
+    nodes[2].type = PARSE_TREE_NODE_BRANCH;
+    nodes[2].content.branches[0] = 0;
+    nodes[2].content.branches[1] = 0;
+
+    pos = 2;
+
+    temp = _vbpt_append(nodes, &pos, 2, branch0->id);
+    /*    _vbpt_write_subexpression(nodes, &pos, results[_vocab_rule_list_length(results)].rule, 0, temp); */
+    _vbpt_write_subexpression(nodes, &pos, results->rule, 0, temp);
+  }
+
+  vocab_free_rule_list(results);
+  return 0;
+}