aboutsummaryrefslogtreecommitdiff
path: root/engines/glk/tads/tads2/regex.h
diff options
context:
space:
mode:
Diffstat (limited to 'engines/glk/tads/tads2/regex.h')
-rw-r--r--engines/glk/tads/tads2/regex.h378
1 files changed, 127 insertions, 251 deletions
diff --git a/engines/glk/tads/tads2/regex.h b/engines/glk/tads/tads2/regex.h
index cd975e066b..48c2dd0ad4 100644
--- a/engines/glk/tads/tads2/regex.h
+++ b/engines/glk/tads/tads2/regex.h
@@ -24,289 +24,165 @@
#define GLK_TADS_TADS2_REGEX
#include "common/array.h"
-#include "glk/tads/tads2/ler.h"
+#include "glk/tads/tads2/error_handling.h"
namespace Glk {
namespace TADS {
namespace TADS2 {
-/**
- * state ID
- */
+
+/* state ID */
typedef int re_state_id;
-/**
- * invalid state ID - used to mark null machines
- */
+/* invalid state ID - used to mark null machines */
#define RE_STATE_INVALID ((re_state_id)-1)
-/**
- * first valid state ID
- */
+/* first valid state ID */
#define RE_STATE_FIRST_VALID ((re_state_id)0)
-/**
+/* ------------------------------------------------------------------------ */
+/*
* Group register structure. Each register keeps track of the starting
- * and ending offset of the group's text.
+ * and ending offset of the group's text.
*/
-struct re_group_register {
- const char *start_ofs;
- const char *end_ofs;
-};
+typedef struct
+{
+ const char *start_ofs;
+ const char *end_ofs;
+} re_group_register;
-/**
- * number of group registers we keep
- */
+/* number of group registers we keep */
#define RE_GROUP_REG_CNT 10
-/**
- * Denormalized state transition tuple. Each tuple represents the
- * complete set of transitions out of a particular state. A particular
- * state can have one character transition, or two epsilon transitions.
- * Note that we don't need to store the state ID in the tuple, because
- * the state ID is the index of the tuple in an array of state tuples.
+
+/* ------------------------------------------------------------------------ */
+/*
+ * Denormalized state transition tuple. Each tuple represents the
+ * complete set of transitions out of a particular state. A particular
+ * state can have one character transition, or two epsilon transitions.
+ * Note that we don't need to store the state ID in the tuple, because
+ * the state ID is the index of the tuple in an array of state tuples.
*/
-struct re_tuple {
- // the character we must match to transition to the target state
- char ch;
+typedef struct
+{
+ /* the character we must match to transition to the target state */
+ char ch;
- // the target states
- re_state_id next_state_1;
- re_state_id next_state_2;
+ /* the target states */
+ re_state_id next_state_1;
+ re_state_id next_state_2;
- // character range match table, if used
- unsigned char *char_range;
+ /* character range match table, if used */
+ unsigned char *char_range;
- // flags
- byte flags;
-};
+ /* flags */
+ unsigned char flags;
+} re_tuple;
-/**
- * Tuple flags
+/*
+ * Tuple flags
*/
-enum {
- // this state is the start of a group - the 'ch' value is the group ID
- RE_STATE_GROUP_BEGIN = 0x02,
- // this state is the end of a group - 'ch' is the group ID */
- RE_STATE_GROUP_END = 0x04
-};
+/* this state is the start of a group - the 'ch' value is the group ID */
+#define RE_STATE_GROUP_BEGIN 0x02
+
+/* this state is the end of a group - 'ch' is the group ID */
+#define RE_STATE_GROUP_END 0x04
+
+
+/* ------------------------------------------------------------------------ */
+/*
+ * Regular expression compilation context structure. This tracks the
+ * state of the compilation and stores the resources associated with the
+ * compiled expression.
+ */
+typedef struct
+{
+ /* error context */
+ errcxdef *errctx;
+
+ /* next available state ID */
+ re_state_id next_state;
+
+ /*
+ * The array of transition tuples. We'll allocate this array and
+ * expand it as necessary.
+ */
+ re_tuple *tuple_arr;
-/**
- * Status codes
+ /* number of transition tuples allocated in the array */
+ int tuples_alloc;
+
+ /* current group ID */
+ int cur_group;
+
+ /* group registers */
+ re_group_register regs[RE_GROUP_REG_CNT];
+
+ /*
+ * Buffer for retaining a copy of the last string we scanned. We
+ * retain our own copy of each string, and point the group registers
+ * into this copy rather than the caller's original string -- this
+ * ensures that the group registers remain valid even after the
+ * caller has deallocated the original string.
+ */
+ char *strbuf;
+
+ /* length of the string currently in the buffer */
+ size_t curlen;
+
+ /* size of the buffer allocated to strbuf */
+ size_t strbufsiz;
+} re_context;
+
+
+/* ------------------------------------------------------------------------ */
+/*
+ * Status codes
*/
-typedef enum {
- // success
- RE_STATUS_SUCCESS = 0,
+typedef enum
+{
+ /* success */
+ RE_STATUS_SUCCESS = 0,
- // compilation error - group nesting too deep
- RE_STATUS_GROUP_NESTING_TOO_DEEP
+ /* compilation error - group nesting too deep */
+ RE_STATUS_GROUP_NESTING_TOO_DEEP
} re_status_t;
-/**
- * Regular expression compilation. This tracks the state of the compilation and
- * stores the resources associated with the compiled expression.
+/* ------------------------------------------------------------------------ */
+/*
+ * Initialize the context. The memory for the context structure itself
+ * must be allocated and maintained by the caller.
+ */
+void re_init(re_context *ctx, errcxdef *errctx);
+
+/*
+ * Delete the context - frees structures associated with the context.
+ * Does NOT free the memory used by the context structure itself.
+ */
+void re_delete(re_context *ctx);
+
+/*
+ * Compile an expression and search for a match within the given string.
+ * Returns the offset of the match, or -1 if no match was found.
+ */
+int re_compile_and_search(re_context *ctx,
+ const char *pattern, size_t patlen,
+ const char *searchstr, size_t searchlen,
+ int *result_len);
+
+/*
+ * Compile an expression and check for a match. Returns the length of
+ * the match if we found a match, -1 if we found no match. This is not
+ * a search function; we merely match the leading substring of the given
+ * string to the given pattern.
*/
-class re_context {
- /**
- * A machine description. Machines are fully described by their initial
- * and final state ID's.
- */
- struct re_machine {
- re_state_id init; ///< the machine's initial state
- re_state_id final; ///< the machine's final state
-
- re_machine() : init(0), final(0) {}
-
- /**
- * Build a null machine
- */
- void build_null_machine() {
- init = final = RE_STATE_INVALID;
- }
-
- /**
- * Determine if a machine is null
- */
- bool isNull() const {
- return (init == RE_STATE_INVALID);
- }
- };
-private:
- /**
- * Reset compiler - clears states and tuples
- */
- void reset();
-
- /**
- * Set a transition from a state to a given destination state
- */
- void set_trans(re_state_id id, re_state_id dest_id, char ch);
-
- /**
- * Initialize a new machine, giving it an initial and final state
- */
- void init_machine(re_machine *machine);
-
- /**
- * Build a character recognizer
- */
- void build_char(re_machine *machine, char ch);
-
- /**
- * Build a character range recognizer. 'range' is a 256-bit (32-byte) bit vector.
- */
- void build_char_range(re_machine *machine, unsigned char *range, int exclusion);
-
- /**
- * Build a group recognizer. This is almost the same as a character
- * recognizer, but matches a previous group rather than a literal character.
- */
- void build_group_matcher(re_machine *machine, int group_num);
-
- /**
- * Build a concatenation recognizer
- */
- void build_concat(re_machine *new_machine, re_machine *lhs, re_machine *rhs);
-
- /**
- * Build a group machine. sub_machine contains the machine that
- * expresses the group's contents; we'll fill in new_machine with a
- * newly-created machine that encloses and marks the group.
- */
- void build_group(re_machine *new_machine, re_machine *sub_machine, int group_id);
-
- /**
- * Build an alternation recognizer
- */
- void build_alter(re_machine *new_machine, re_machine *lhs, re_machine *rhs);
-
- /**
- * Build a closure recognizer
- */
- void build_closure(re_machine *new_machine, re_machine *sub, char specifier);
-
- /**
- * Concatenate the second machine onto the first machine, replacing the
- * first machine with the resulting machine. If the first machine is a
- * null machine (created with re_build_null_machine), we'll simply copy
- * the second machine into the first.
- */
- void concat_onto(re_machine *dest, re_machine *rhs);
-
- /**
- * Alternate the second machine onto the first machine, replacing the
- * first machine with the resulting machine. If the first machine is a
- * null machine, this simply replaces the first machine with the second
- * machine. If the second machine is null, this simply leaves the first
- * machine unchanged.
- */
- void alternate_onto(re_machine *dest, re_machine *rhs);
-
- /**
- * Compile an expression
- */
- re_status_t compile(const char *expr, size_t exprlen, re_machine *result_machine);
-
- /**
- * Note a group position if appropriate
- */
- void note_group(re_group_register *regs, re_state_id id, const char *p);
-
- /**
- * Determine if a character is part of a word. We consider letters and
- * numbers to be word characters.
- */
- bool is_word_char(char c) const;
-
- /**
- * Match a string to a compiled expression. Returns the length of the
- * match if successful, or -1 if no match was found.
- */
- int match(const char *entire_str, const char *str, size_t origlen,
- const re_machine *machine, re_group_register *regs);
-
- /**
- * Search for a regular expression within a string. Returns -1 if the string
- * cannot be found, otherwise returns the offset from the start of the string
- * to be searched of the start of the first match for the pattern.
- */
- int search(const char *str, size_t len, const re_machine *machine,
- re_group_register *regs, int *result_len);
-
- /**
- * Make a copy of a search string in our private buffer.
- */
- void save_search_str(const char *str, size_t len);
-public:
- errcxdef *_errctx; ///< error context
- re_state_id _next_state; ///< next available state ID
-
- /**
- * The array of transition tuples. We'll allocate this array and
- * expand it as necessary.
- */
- Common::Array<re_tuple> _tuple_arr;
-
- // current group ID
- int _cur_group;
-
- // group registers
- re_group_register _regs[RE_GROUP_REG_CNT];
-
- /**
- * Buffer for retaining a copy of the last string we scanned. We
- * retain our own copy of each string, and point the group registers
- * into this copy rather than the caller's original string -- this
- * ensures that the group registers remain valid even after the
- * caller has deallocated the original string.
- */
- char *_strbuf;
-
- /**
- * length of the string currently in the buffer
- */
- size_t _curlen;
-
- /**
- * size of the buffer allocated to strbuf
- */
- size_t _strbufsiz;
-public:
- /**
- * Constructor. The memory for the context structure itself
- * must be allocated and maintained by the caller.
- */
- re_context(errcxdef *errctx);
-
- /**
- * Destructor
- */
- ~re_context();
-
- /**
- * Allocate a new state ID
- */
- re_state_id alloc_state();
-
- /**
- * Compile an expression and search for a match within the given string.
- * Returns the offset of the match, or -1 if no match was found.
- */
- int compile_and_search(const char *pattern, size_t patlen,
- const char *searchstr, size_t searchlen, int *result_len);
-
- /**
- * Compile an expression and check for a match. Returns the length of the match
- * if we found a match, -1 if we found no match. This is not a search function;
- * we merely match the leading substring of the given string to the given pattern.
- */
- int compile_and_match(const char *pattern, size_t patlen,
- const char *searchstr, size_t searchlen);
-};
+int re_compile_and_match(re_context *ctx,
+ const char *pattern, size_t patlen,
+ const char *searchstr, size_t searchlen);
} // End of namespace TADS2
} // End of namespace TADS