1 files changed, 127 insertions, 251 deletions
diff --git a/engines/glk/tads/tads2/regex.h b/engines/glk/tads/tads2/regex.h
index cd975e066b..48c2dd0ad4 100644
--- a/engines/glk/tads/tads2/regex.h
+++ b/engines/glk/tads/tads2/regex.h
@@ -24,289 +24,165 @@
 #define GLK_TADS_TADS2_REGEX
 
 #include "common/array.h"
-#include "glk/tads/tads2/ler.h"
+#include "glk/tads/tads2/error_handling.h"
 
 namespace Glk {
 namespace TADS {
 namespace TADS2 {
 
-/**
- * state ID
- */
+
+/* state ID */
 typedef int re_state_id;
 
-/**
- * invalid state ID - used to mark null machines
- */
+/* invalid state ID - used to mark null machines */
 #define RE_STATE_INVALID   ((re_state_id)-1)
 
-/**
- * first valid state ID
- */
+/* first valid state ID */
 #define RE_STATE_FIRST_VALID  ((re_state_id)0)
 
 
-/**
+/* ------------------------------------------------------------------------ */
+/*
  *   Group register structure.  Each register keeps track of the starting
- *   and ending offset of the group's text.
+ *   and ending offset of the group's text.  
  */
-struct re_group_register {
-	const char *start_ofs;
-	const char *end_ofs;
-};
+typedef struct
+{
+    const char *start_ofs;
+    const char *end_ofs;
+} re_group_register;
 
-/**
- * number of group registers we keep
- */
+/* number of group registers we keep */
 #define RE_GROUP_REG_CNT  10
 
-/**
- * Denormalized state transition tuple.  Each tuple represents the
- * complete set of transitions out of a particular state.  A particular
- * state can have one character transition, or two epsilon transitions.
- * Note that we don't need to store the state ID in the tuple, because
- * the state ID is the index of the tuple in an array of state tuples.
+
+/* ------------------------------------------------------------------------ */
+/* 
+ *   Denormalized state transition tuple.  Each tuple represents the
+ *   complete set of transitions out of a particular state.  A particular
+ *   state can have one character transition, or two epsilon transitions.
+ *   Note that we don't need to store the state ID in the tuple, because
+ *   the state ID is the index of the tuple in an array of state tuples.  
  */
-struct re_tuple {
-	// the character we must match to transition to the target state
-	char ch;
+typedef struct
+{
+    /* the character we must match to transition to the target state */
+    char ch;
 
-	// the target states
-	re_state_id next_state_1;
-	re_state_id next_state_2;
+    /* the target states */
+    re_state_id next_state_1;
+    re_state_id next_state_2;
 
-	// character range match table, if used
-	unsigned char *char_range;
+    /* character range match table, if used */
+    unsigned char *char_range;
 
-	// flags
-	byte flags;
-};
+    /* flags */
+    unsigned char flags;
+} re_tuple;
 
 
-/**
- * Tuple flags
+/*
+ *   Tuple flags 
  */
-enum {
-	// this state is the start of a group - the 'ch' value is the group ID
-	RE_STATE_GROUP_BEGIN = 0x02,
 
-	// this state is the end of a group - 'ch' is the group ID */
-	RE_STATE_GROUP_END = 0x04
-};
+/* this state is the start of a group - the 'ch' value is the group ID */
+#define RE_STATE_GROUP_BEGIN  0x02
+
+/* this state is the end of a group - 'ch' is the group ID */
+#define RE_STATE_GROUP_END    0x04
+
+
+/* ------------------------------------------------------------------------ */
+/*
+ *   Regular expression compilation context structure.  This tracks the
+ *   state of the compilation and stores the resources associated with the
+ *   compiled expression.  
+ */
+typedef struct
+{
+    /* error context */
+    errcxdef *errctx;
+
+    /* next available state ID */
+    re_state_id next_state;
+
+    /*
+     *   The array of transition tuples.  We'll allocate this array and
+     *   expand it as necessary.  
+     */
+    re_tuple *tuple_arr;
 
-/**
- * Status codes
+    /* number of transition tuples allocated in the array */
+    int tuples_alloc;
+
+    /* current group ID */
+    int cur_group;
+
+    /* group registers */
+    re_group_register regs[RE_GROUP_REG_CNT];
+
+    /* 
+     *   Buffer for retaining a copy of the last string we scanned.  We
+     *   retain our own copy of each string, and point the group registers
+     *   into this copy rather than the caller's original string -- this
+     *   ensures that the group registers remain valid even after the
+     *   caller has deallocated the original string.  
+     */
+    char *strbuf;
+
+    /* length of the string currently in the buffer */
+    size_t curlen;
+
+    /* size of the buffer allocated to strbuf */
+    size_t strbufsiz;
+} re_context;
+
+
+/* ------------------------------------------------------------------------ */
+/*
+ *   Status codes 
  */
-typedef enum {
-	// success
-	RE_STATUS_SUCCESS = 0,
+typedef enum
+{
+    /* success */
+    RE_STATUS_SUCCESS = 0,
 
-	// compilation error - group nesting too deep
-	RE_STATUS_GROUP_NESTING_TOO_DEEP
+    /* compilation error - group nesting too deep */
+    RE_STATUS_GROUP_NESTING_TOO_DEEP
 } re_status_t;
 
 
-/**
- * Regular expression compilation.  This tracks the state of the compilation and
- * stores the resources associated with the compiled expression.
+/* ------------------------------------------------------------------------ */
+/*
+ *   Initialize the context.  The memory for the context structure itself
+ *   must be allocated and maintained by the caller. 
+ */
+void re_init(re_context *ctx, errcxdef *errctx);
+
+/*
+ *   Delete the context - frees structures associated with the context.
+ *   Does NOT free the memory used by the context structure itself.  
+ */
+void re_delete(re_context *ctx);
+
+/*
+ *   Compile an expression and search for a match within the given string.
+ *   Returns the offset of the match, or -1 if no match was found.  
+ */
+int re_compile_and_search(re_context *ctx,
+                          const char *pattern, size_t patlen,
+                          const char *searchstr, size_t searchlen,
+                          int *result_len);
+
+/*
+ *   Compile an expression and check for a match.  Returns the length of
+ *   the match if we found a match, -1 if we found no match.  This is not
+ *   a search function; we merely match the leading substring of the given
+ *   string to the given pattern.  
  */
-class re_context {
-	/**
-	 * A machine description.  Machines are fully described by their initial
-	 * and final state ID's.
-	 */
-	struct re_machine {
-		re_state_id init;		///< the machine's initial state
-		re_state_id final;		///< the machine's final state
-
-		re_machine() : init(0), final(0) {}
-
-		/**
-		 * Build a null machine
-		 */
-		void build_null_machine() {
-			init = final = RE_STATE_INVALID;
-		}
-
-		/**
-		 * Determine if a machine is null
-		 */
-		bool isNull() const {
-			return (init == RE_STATE_INVALID);
-		}
-	};
-private:
-	/**
-	 * Reset compiler - clears states and tuples
-	 */
-	void reset();
-
-	/**
-	 * Set a transition from a state to a given destination state
-	 */
-	void set_trans(re_state_id id, re_state_id dest_id, char ch);
-
-	/**
-	 * Initialize a new machine, giving it an initial and final state
-	 */
-	void init_machine(re_machine *machine);
-
-	/**
-	 * Build a character recognizer
-	 */
-	void build_char(re_machine *machine, char ch);
-
-	/**
-	 * Build a character range recognizer.  'range' is a 256-bit (32-byte) bit vector.
-	 */
-	void build_char_range(re_machine *machine, unsigned char *range, int exclusion);
-
-	/**
-	 * Build a group recognizer.  This is almost the same as a character
-	 * recognizer, but matches a previous group rather than a literal character.
-	 */
-	void build_group_matcher(re_machine *machine, int group_num);
-
-	/**
-	 *   Build a concatenation recognizer
-	 */
-	void build_concat(re_machine *new_machine, re_machine *lhs, re_machine *rhs);
-
-	/**
-	 * Build a group machine.  sub_machine contains the machine that
-	 * expresses the group's contents; we'll fill in new_machine with a
-	 * newly-created machine that encloses and marks the group.
-	 */
-	void build_group(re_machine *new_machine, re_machine *sub_machine, int group_id);
-
-	/**
-	 * Build an alternation recognizer
-	 */
-	void build_alter(re_machine *new_machine, re_machine *lhs, re_machine *rhs);
-
-	/**
-	 * Build a closure recognizer
-	 */
-	void build_closure(re_machine *new_machine, re_machine *sub, char specifier);
-
-	/**
-	 * Concatenate the second machine onto the first machine, replacing the
-	 * first machine with the resulting machine.  If the first machine is a
-	 * null machine (created with re_build_null_machine), we'll simply copy
-	 * the second machine into the first.
-	 */
-	void concat_onto(re_machine *dest, re_machine *rhs);
-
-	/**
-	 * Alternate the second machine onto the first machine, replacing the
-	 * first machine with the resulting machine.  If the first machine is a
-	 * null machine, this simply replaces the first machine with the second
-	 * machine.  If the second machine is null, this simply leaves the first
-	 * machine unchanged.
-	 */
-	void alternate_onto(re_machine *dest, re_machine *rhs);
-
-	/**
-	 * Compile an expression
-	 */
-	re_status_t compile(const char *expr, size_t exprlen, re_machine *result_machine);
-
-	/**
-	 * Note a group position if appropriate
-	 */
-	void note_group(re_group_register *regs, re_state_id id, const char *p);
-
-	/**
-	 * Determine if a character is part of a word.  We consider letters and
-	 * numbers to be word characters.
-	 */
-	bool is_word_char(char c) const;
-
-	/**
-	 * Match a string to a compiled expression.  Returns the length of the
-	 * match if successful, or -1 if no match was found.
-	 */
-	int match(const char *entire_str, const char *str, size_t origlen,
-		const re_machine *machine, re_group_register *regs);
-
-	/**
-	 * Search for a regular expression within a string.  Returns -1 if the string
-	 * cannot be found, otherwise returns the offset from the start of the string
-	 * to be searched of the start of the first match for the pattern.
-	 */
-	int search(const char *str, size_t len, const re_machine *machine,
-		re_group_register *regs, int *result_len);
-
-	/**
-	 * Make a copy of a search string in our private buffer.
-	 */
-	void save_search_str(const char *str, size_t len);
-public:
-	errcxdef *_errctx;			///< error context
-	re_state_id _next_state;	///< next available state ID
-
-	/**
-	 * The array of transition tuples.  We'll allocate this array and
-	 * expand it as necessary.
-	 */
-	Common::Array<re_tuple> _tuple_arr;
-
-	// current group ID
-	int _cur_group;
-
-	// group registers
-	re_group_register _regs[RE_GROUP_REG_CNT];
-
-	/**
-	 * Buffer for retaining a copy of the last string we scanned.  We
-	 * retain our own copy of each string, and point the group registers
-	 * into this copy rather than the caller's original string -- this
-	 * ensures that the group registers remain valid even after the
-	 * caller has deallocated the original string.
-	 */
-	char *_strbuf;
-
-	/**
-	 * length of the string currently in the buffer
-	 */
-	size_t _curlen;
-
-	/**
-	 * size of the buffer allocated to strbuf
-	 */
-	size_t _strbufsiz;
-public:
-	/**
-	 * Constructor.  The memory for the context structure itself
-	 * must be allocated and maintained by the caller.
-	 */
-	re_context(errcxdef *errctx);
-
-	/**
-	 * Destructor
-	 */
-	~re_context();
-
-	/**
-	 * Allocate a new state ID
-	 */
-	re_state_id alloc_state();
-
-	/**
-	 * Compile an expression and search for a match within the given string.
-	 * Returns the offset of the match, or -1 if no match was found.
-	 */
-	int compile_and_search(const char *pattern, size_t patlen,
-		const char *searchstr, size_t searchlen, int *result_len);
-
-	/**
-	 * Compile an expression and check for a match.  Returns the length of the match
-	 * if we found a match, -1 if we found no match.  This is not a search function;
-	 * we merely match the leading substring of the given string to the given pattern.
-	 */
-	int compile_and_match(const char *pattern, size_t patlen,
-		const char *searchstr, size_t searchlen);
-};
+int re_compile_and_match(re_context *ctx,
+                         const char *pattern, size_t patlen,
+                         const char *searchstr, size_t searchlen);
 
 } // End of namespace TADS2
 } // End of namespace TADS