aboutsummaryrefslogtreecommitdiff
path: root/engines/glk/tads/tads2/regex.h
blob: 0f3acfc18bdf87d83ba2a3dd4ecc892bb1dd6724 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 */

#ifndef GLK_TADS_TADS2_REGEX
#define GLK_TADS_TADS2_REGEX

#include "common/array.h"
#include "glk/tads/tads2/error_handling.h"

namespace Glk {
namespace TADS {
namespace TADS2 {


/* state ID */
typedef int re_state_id;

/* invalid state ID - used to mark null machines */
#define RE_STATE_INVALID   ((re_state_id)-1)

/* first valid state ID */
#define RE_STATE_FIRST_VALID  ((re_state_id)0)


/* ------------------------------------------------------------------------ */
/*
 *   Group register structure.  Each register keeps track of the starting
 *   and ending offset of the group's text.  
 */
typedef struct _re_group_register
{
    const char *start_ofs;
    const char *end_ofs;

    _re_group_register() : start_ofs(nullptr), end_ofs(nullptr) {}
} re_group_register;

/* number of group registers we keep */
#define RE_GROUP_REG_CNT  10


/* ------------------------------------------------------------------------ */
/* 
 *   Denormalized state transition tuple.  Each tuple represents the
 *   complete set of transitions out of a particular state.  A particular
 *   state can have one character transition, or two epsilon transitions.
 *   Note that we don't need to store the state ID in the tuple, because
 *   the state ID is the index of the tuple in an array of state tuples.  
 */
typedef struct
{
    /* the character we must match to transition to the target state */
    char ch;

    /* the target states */
    re_state_id next_state_1;
    re_state_id next_state_2;

    /* character range match table, if used */
    unsigned char *char_range;

    /* flags */
    unsigned char flags;
} re_tuple;


/*
 *   Tuple flags 
 */

/* this state is the start of a group - the 'ch' value is the group ID */
#define RE_STATE_GROUP_BEGIN  0x02

/* this state is the end of a group - 'ch' is the group ID */
#define RE_STATE_GROUP_END    0x04


/* ------------------------------------------------------------------------ */
/*
 *   Regular expression compilation context structure.  This tracks the
 *   state of the compilation and stores the resources associated with the
 *   compiled expression.  
 */
typedef struct _re_context
{
    /* error context */
    errcxdef *errctx;

    /* next available state ID */
    re_state_id next_state;

    /*
     *   The array of transition tuples.  We'll allocate this array and
     *   expand it as necessary.  
     */
    re_tuple *tuple_arr;

    /* number of transition tuples allocated in the array */
    int tuples_alloc;

    /* current group ID */
    int cur_group;

    /* group registers */
    re_group_register regs[RE_GROUP_REG_CNT];

    /* 
     *   Buffer for retaining a copy of the last string we scanned.  We
     *   retain our own copy of each string, and point the group registers
     *   into this copy rather than the caller's original string -- this
     *   ensures that the group registers remain valid even after the
     *   caller has deallocated the original string.  
     */
    char *strbuf;

    /* length of the string currently in the buffer */
    size_t curlen;

    /* size of the buffer allocated to strbuf */
    size_t strbufsiz;

    _re_context() : errctx(nullptr), next_state(0), tuple_arr(nullptr), tuples_alloc(0), cur_group(0), strbuf(nullptr), curlen(0), strbufsiz(0) {}
} re_context;


/* ------------------------------------------------------------------------ */
/*
 *   Status codes 
 */
typedef enum
{
    /* success */
    RE_STATUS_SUCCESS = 0,

    /* compilation error - group nesting too deep */
    RE_STATUS_GROUP_NESTING_TOO_DEEP
} re_status_t;


/* ------------------------------------------------------------------------ */
/*
 *   Initialize the context.  The memory for the context structure itself
 *   must be allocated and maintained by the caller. 
 */
void re_init(re_context *ctx, errcxdef *errctx);

/*
 *   Delete the context - frees structures associated with the context.
 *   Does NOT free the memory used by the context structure itself.  
 */
void re_delete(re_context *ctx);

/*
 *   Compile an expression and search for a match within the given string.
 *   Returns the offset of the match, or -1 if no match was found.  
 */
int re_compile_and_search(re_context *ctx,
                          const char *pattern, size_t patlen,
                          const char *searchstr, size_t searchlen,
                          int *result_len);

/*
 *   Compile an expression and check for a match.  Returns the length of
 *   the match if we found a match, -1 if we found no match.  This is not
 *   a search function; we merely match the leading substring of the given
 *   string to the given pattern.  
 */
int re_compile_and_match(re_context *ctx,
                         const char *pattern, size_t patlen,
                         const char *searchstr, size_t searchlen);

} // End of namespace TADS2
} // End of namespace TADS
} // End of namespace Glk

#endif