aboutsummaryrefslogtreecommitdiff
path: root/engines/glk/tads/tads2/tokenizer.h
blob: 882ab9242c9f4bf329d71f754f8920063be31c1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 */

#ifndef GLK_TADS_TADS2_TOKENIZER
#define GLK_TADS_TADS2_TOKENIZER

#include "glk/tads/tads2/lib.h"
#include "glk/tads/tads2/error_handling.h"
#include "glk/tads/tads2/line_source.h"
#include "glk/tads/tads2/line_source_file.h"
#include "glk/tads/tads2/memory_cache.h"

namespace Glk {
namespace TADS {
namespace TADS2 {

/* number of entries in hash table - must be power of 2 */
#define TOKHASHSIZE 256

/* symbol types */
#define TOKSTUNK      0                  /* unknown symbol, not yet defined */
#define TOKSTFUNC     1                 /* function; value is object number */
#define TOKSTOBJ      2                   /* object; value is object number */
#define TOKSTPROP     3               /* property; value is property number */
#define TOKSTLOCAL    4             /* a local variable or formal parameter */
#define TOKSTSELF     5                         /* the pseudo-object "self" */
#define TOKSTBIFN     6                              /* a built-in function */
#define TOKSTFWDOBJ   7                        /* forward-referenced object */
#define TOKSTFWDFN    8                        /* forward-referenced object */
#define TOKSTINHERIT  9                    /* the pseudo-object "inherited" */
#define TOKSTEXTERN  10                             /* an external function */
#define TOKSTKW      11                   /* keyword; value is token number */
#define TOKSTLABEL   12                                  /* statement label */
#define TOKSTARGC    13                       /* 'argcount' pseudo-variable */
#define TOKSTPROPSPEC 14                 /* speculative evaluation property */

/* token types */
#define TOKTEOF     1

/* binary operators - keep these together (see prsbopl[] in prs.c) */
#define TOKTPLUS    2
#define TOKTMINUS   3
#define TOKTDIV     4
#define TOKTTIMES   5
#define TOKTNOT     6                                         /* ! or "not" */
#define TOKTEQ      7
#define TOKTNE      8
#define TOKTGT      9
#define TOKTGE      10
#define TOKTLT      11
#define TOKTLE      12
#define TOKTMOD     13
#define TOKTBAND    14
#define TOKTBOR     15
#define TOKTXOR     16
#define TOKTSHL     17
#define TOKTSHR     18
#define TOKTTILDE   30

/* 
 *   special 'dot' replacement for speculative evaluation mode -- this is
 *   strictly for marking parse tree nodes, and has the same meaning in a
 *   parse tree node as a regular TOKTDOT, but generates code that can't
 *   call methods 
 */
#define TOKTDOTSPEC 31

/* special node marker for explicit superclass inheritance nodes */
#define TOKTEXPINH  32

#define TOKTLPAR    50                                                 /* ( */
#define TOKTRPAR    51                                                 /* ) */
#define TOKTCOLON   52
#define TOKTDSTRING 53                           /* string in double quotes */
#define TOKTSSTRING 54                           /* string in single quotes */
#define TOKTNUMBER  55
#define TOKTSYMBOL  56
#define TOKTINVALID 57                             /* invalid lexical token */
#define TOKTLBRACK  58                                                 /* [ */
#define TOKTRBRACK  59                                                 /* ] */
#define TOKTLBRACE  60                                                 /* { */
#define TOKTRBRACE  61                                                 /* } */
#define TOKTSEM     62                                                 /* ; */
#define TOKTCOMMA   63
#define TOKTDOT     64                                                 /* . */
#define TOKTOR      65                                         /* | or "if" */
#define TOKTAND     66                                        /* & or "and" */
#define TOKTIF      67                                          /* keywords */
#define TOKTELSE    68
#define TOKTWHILE   69
#define TOKTFUNCTION 70
#define TOKTRETURN  71
#define TOKTLOCAL   72
#define TOKTOBJECT  73
#define TOKTBREAK   74
#define TOKTCONTINUE 75
#define TOKTLIST    76                                            /* a list */
#define TOKTNIL     77
#define TOKTTRUE    78
#define TOKTPASS    79
#define TOKTCLASS   80
#define TOKTEXIT    81
#define TOKTABORT   82
#define TOKTASKDO   83
#define TOKTASKIO   84
#define TOKTPOUND   85                                                 /* # */
#define TOKTQUESTION 86                                                /* ? */
#define TOKTCOMPOUND 87
#define TOKTIOSYN   88
#define TOKTDOSYN   89
#define TOKTEXTERN  90
#define TOKTFORMAT  91
#define TOKTDO      92
#define TOKTFOR     93
#define TOKTNEW     94
#define TOKTDELETE  95

/* assignment operators - keep these together */
#define TOKTINC     150                                               /* ++ */
#define TOKTPOSTINC 151                              /* MUST BE TOKTINC + 1 */
#define TOKTDEC     152                                               /* -- */
#define TOKTPOSTDEC 153                              /* MUST BE TOKTDEC + 1 */
#define TOKTPLEQ    154                                               /* += */
#define TOKTMINEQ   155                                               /* -= */
#define TOKTDIVEQ   156                                               /* /= */
#define TOKTTIMEQ   157                                               /* *= */
#define TOKTASSIGN  158                                /* simple assignment */
#define TOKTMODEQ   159                     /* %= (mod and assign) operator */
#define TOKTBANDEQ  160                                               /* &= */
#define TOKTBOREQ   161                                               /* |= */
#define TOKTXOREQ   162                              /* ^= (xor and assign) */
#define TOKTSHLEQ   163                      /* <<= (shift left and assign) */
#define TOKTSHREQ   164                      /* >>= (shift right and assign */

#define TOKTSWITCH  200
#define TOKTCASE    201
#define TOKTDEFAULT 202
#define TOKTGOTO    203
#define TOKTELLIPSIS 204                                             /* ... */
#define TOKTSPECIAL 205                                   /* "specialWords" */
#define TOKTREPLACE 206                                          /* replace */
#define TOKTMODIFY  207                                           /* modify */

#define TOKTEQEQ    208                                /* the '==' operator */
#define TOKTPOINTER 209                                  /* the -> operator */

/* the longest a symbol name can be */
#define TOKNAMMAX 39

/* symbol table entry */
struct toksdef {
    uchar    tokstyp;                                 /* type of the symbol */
    uchar    tokshsh;                               /* hash value of symbol */
    ushort   toksval;              /* value of the symbol (depends on type) */
    ushort   toksfr;               /* frame offset of symbol (for debugger) */
    uchar    tokslen;                        /* length of the symbol's name */
    char     toksnam[TOKNAMMAX];                          /* name of symbol */
};

/* symbol table entry without 'name' portion - for allocation purposes */
struct toks1def {
    uchar    tokstyp;
    uchar    tokshsh;
    ushort   toksval;
    ushort   toksfr;
    uchar    tokslen;
    char     toksnam[1];
};

/* generic symbol table object - other symbol tables are subclasses */
struct toktdef {
    void     (*toktfadd)(toktdef *tab, char *name, int namel, int typ,
                         int val, int hash);                  /* add symbol */
    int      (*toktfsea)(toktdef *tab, char *name, int namel, int hash,
                         toksdef *ret);              /* search symbol table */
    void     (*toktfset)(toktdef *tab, toksdef *sym);
                             /* update val & typ of symbol to those in *sym */
    void     (*toktfeach)(toktdef *tab,
                          void (*fn)(void *ctx, toksdef *sym),
                          void *fnctx);            /* call fn for each sym */
    toktdef   *toktnxt;                 /* next symbol table to be searched */
    errcxdef  *tokterr;                           /* error handling context */
};

/* maximum number of pools (TOKTSIZE bytes each) for symbols */
#define TOKPOOLMAX 128

/* pointer to a symbol in a hashed symbol table */
struct tokthpdef {
    mcmon  tokthpobj;                /* cache manager object number of page */
    uint   tokthpofs;                  /* offset within page of this symbol */
};

/* extended symbol entry in a hashed symbol table */
struct tokshdef {
    tokthpdef tokshnxt;              /* pointer to next symbol in the table */
    toksdef   tokshsc;                  /* superclass - normal symbol entry */
};

/* hashing symbol table (subclass of generic symbol table) */
struct tokthdef {
    toktdef   tokthsc;              /* generic symbol table superclass data */
    mcmcxdef *tokthmem;                           /* memory manager context */
    tokthpdef tokthhsh[TOKHASHSIZE];                          /* hash table */
    uint      tokthpcnt;            /* number of memory pools for toksdef's */
    mcmon     tokthpool[TOKPOOLMAX];          /* memory pools for toksdef's */
    uint      tokthfinal[TOKPOOLMAX];        /* actual sizes of these pools */
    uchar    *tokthcpool;                           /* current pool pointer */
    ushort    tokthsize;               /* remaining size of top memory pool */
    ushort    tokthofs;             /* allocation offset in top memory pool */
};

/* size of toksdef pools to allocate for hashed symbol tables */
#define TOKTHSIZE 4096

/*
 *   Linear cache-object-embedded symbol table.  This type of symbol
 *   table is used for frame parameter/local variable lists.  It is best
 *   for small tables, because it isn't broken up into hash buckets, so it
 *   is searched linearly.  As a result, it's small enough to be embedded
 *   in code.
 */
struct toktldef {
    toktdef   toktlsc;              /* generic symbol table superclass data */
    uchar    *toktlptr;                      /* base of linear symbol table */
    uchar    *toktlnxt;                          /* next free byte in table */
    uint      toktlcnt;                   /* number of objects in the table */
    uint      toktlsiz;                     /* bytes remaining in the table */
};

struct tokdef {
    int      toktyp;                                   /* type of the token */
    int      toklen;           /* length of token text, if a symbolic token */
    long     tokval;                        /* numeric value, if applicable */
    ushort   tokofs;
    uint     tokhash;              /* token hash value, if a symbolic token */
    char     toknam[TOKNAMMAX+1];     /* text of token, if a symbolic token */
    toksdef  toksym;                    /* symbol from table matching token */
};

/* special character sequence */
#define TOKSCMAX  3            /* maximum length of a special char sequence */
struct tokscdef {
    tokscdef *tokscnxt;          /* next sequence with same first character */
    int       toksctyp;             /* token type corresponding to sequence */
    int       toksclen;                           /* length of the sequence */
    char      tokscstr[TOKSCMAX+1];                  /* the sequence itself */
};

/*
 *   Compare a special character sequence - for efficiency, define
 *   something special for the maximum length available (TOKSCMAX).
 *   Note that the first character will always be equal, or the
 *   string wouldn't even get to the point of being tested by this
 *   macro.
 */
#if TOKSCMAX == 3
# define toksceq(str1, str2, len1, len2) \
    ((len2) >= (len1) \
     && ((len1) == 1 \
         || ((str1)[1] == (str2)[1] \
             && ((len1) == 2 \
                 || (str1)[2] == (str2)[2]))))
#endif /* TOKSCMAX == 3 */
#ifndef toksceq
# define toksceq(str1, str2, len) (!memcmp(str1, str2, (size_t)(len)))
#endif /* toksceq */

/* special character sequence list table entry */
struct tokldef {
    int  tokltyp;                   /* token type corresponding to sequence */
    char toklstr[TOKSCMAX+1];                   /* the text of the sequence */
};

/* include path structure */
struct tokpdef {
    tokpdef *tokpnxt;                                  /* next path in list */
    int      tokplen;                           /* length of directory name */
    char     tokpdir[1];                             /* directory to search */
};

/* #define symbol structure */
struct tokdfdef {
    tokdfdef *nxt;                    /* next symbol in the same hash chain */
    char     *nm;                                     /* name of the symbol */
    int       len;                                  /* length of the symbol */
    int       explen;                            /* length of the expansion */
    char      expan[1];                                 /* expansion buffer */
};

/* #define hash table information */
#define TOKDFHSHSIZ   64
#define TOKDFHSHMASK  63

/* maximum #if nesting */
#define TOKIFNEST     64

/* #if state */
#define TOKIF_IF_YES    1             /* processing a true #if/#ifdef block */
#define TOKIF_IF_NO     2            /* processing a false #if/#ifdef block */
#define TOKIF_ELSE_YES  3                   /* processing a true #else part */
#define TOKIF_ELSE_NO   4                  /* processing a false #else part */

/* maximum macro expansion nesting */
#define TOKMACNEST  20

/* lexical analysis context */
struct tokcxdef {
    errcxdef *tokcxerr;                           /* error handling context */
    mcmcxdef *tokcxmem;                            /* cache manager context */
    struct    dbgcxdef *tokcxdbg;                       /* debugger context */
    lindef   *tokcxlin;                                      /* line source */
    tokpdef  *tokcxinc;                        /* head of include path list */
    toktdef  *tokcxstab;              /* current head of symbol table chain */
    void     *tokcxscx;    /* context for string storage callback functions */
    ushort  (*tokcxsst)(void *ctx);
               /* start storing a string; return offset of string's storage */
    void    (*tokcxsad)(void *ctx, const char *str, ushort len);
                                              /* add characters to a string */
    void    (*tokcxsend)(void *ctx);               /* finish storing string */
    const char *tokcxmsav[TOKMACNEST]; /* saved positions for macro expansion */
    ushort    tokcxmsvl[TOKMACNEST];   /* saved lengths for macro expansion */
    int       tokcxmlvl;                             /* macro nesting level */
    int       tokcxflg;                                            /* flags */
#   define    TOKCXFINMAC    0x01         /* doing <<expr>> macro expansion */
#   define    TOKCXCASEFOLD  0x02              /* fold upper and lower case */
#   define    TOKCXFCMODE    0x04                /* parse using C operators */
#   define    TOKCXF_EMBED_PAREN_PRE 0x08        /* embedded expr - did '(' */
#   define    TOKCXF_EMBED_PAREN_AFT 0x10    /* embedded expr - must do ')' */
#   define    TOKCXFLIN2     0x20                 /* new-style line records */
    tokdef    tokcxcur;                                    /* current token */
    char     *tokcxbuf;                            /* buffer for long lines */
    ushort    tokcxbsz;                         /* size of long line buffer */
    const char *tokcxptr;                       /* pointer into line source */
    ushort    tokcxlen;                         /* length of text in buffer */
    uchar     tokcxinx[256];                   /* special character indices */
    tokdfdef *tokcxdf[TOKDFHSHSIZ];       /* hash table for #define symbols */
    int       tokcxifcnt;           /* number of #endif's we expect to find */
    char      tokcxif[TOKIFNEST];       /* #if state for each nesting level */
    int       tokcxifcur;             /* current #if state, obeying nesting */
    linfdef  *tokcxhdr;              /* list of previously included headers */
    tokscdef *tokcxsc[1];                        /* special character table */
};


/* allocate and initialize a lexical analysis context */
tokcxdef *tokcxini(errcxdef *errctx, mcmcxdef *mctx, tokldef *sctab);

/* add an include path to a token handling context */
void tokaddinc(tokcxdef *ctx, char *path, int pathlen);

/* compute the hash value of a string */
uint tokhsh(char *nam);

/* 
 *   Fold case of a token if we're in case-insensitive mode.  This should
 *   be called any time a token is constructed artificially; it need not
 *   be used the token is read through the tokenizer, because the
 *   tokenizer will always adjust a token as needed before returning it. 
 */
void tok_case_fold(tokcxdef *ctx, tokdef *tok);

/* initialize a hashed symbol table */
void tokthini(errcxdef *errctx, mcmcxdef *memctx, toktdef *toktab1);

/* add a symbol to a hashed symbol table */
void tokthadd(toktdef *toktab, char *name, int namel,
              int typ, int val, int hash);

/* update a symbol in a hashed symbol table */
void tokthset(toktdef *toktab, toksdef *sym);

/* search a hashed symbol table for a symbol */
int tokthsea(toktdef *tab, char *name, int namel, int hash,
             toksdef *ret);

/* call a function for each symbol in a hashed symbol table */
void toktheach(toktdef *tab, void (*cb)(void *ctx, toksdef *sym),
               void *ctx);

/* find a symbol given type and value */
int tokthfind(toktdef *tab, int typ, uint val, toksdef *sym);

/* initialize a linear symbol table */
void toktlini(errcxdef *errctx, toktldef *toktab,
              uchar *mem, uint siz);

/* add a symbol to a linear symbol table */
void toktladd(toktdef *toktab, char *name, int namel,
              int typ, int val, int hash);
                
/* search a linear symbol table */
int toktlsea(toktdef *tab, char *name, int namel, int hash,
             toksdef *ret);

/* update a symbol in a linear symbol table */
void toktlset(toktdef *toktab, toksdef *sym);

/* call a function for each symbol in a local symbol table */
void toktleach(toktdef *tab, void (*cb)(void *ctx, toksdef *sym),
               void *ctx);

/* delete all symbols from a linear table */
void toktldel(toktldef *tab);

/* get next token, removing it from input stream */
int toknext(tokcxdef *ctx);

/* general function to get/peek at next token */
int tokget1(tokcxdef *ctx, tokdef *tok, int consume);

/* add a symbol to the #define symbol table */
void tok_add_define(tokcxdef *ctx, const char *sym, int len,
                    const char *expan, int explen);

/* 
 *   add a symbol to the #define symbol table, folding case if we're
 *   operating in case-insensitive mode 
 */
void tok_add_define_cvtcase(tokcxdef *ctx, char *sym, int len,
                            char *expan, int explen);

/* add a symbol to the #define symbol table as a number */
void tok_add_define_num_cvtcase(tokcxdef *ctx, char *sym, int len, int num);

/* undefine a #define symbol */
void tok_del_define(tokcxdef *ctx, char *sym, int len);

/* read/write preprocessor symbols from/to a file */
void tok_read_defines(tokcxdef *ctx, osfildef *fp, errcxdef *ec);

/* write preprocessor state to a file */
void tok_write_defines(tokcxdef *ctx, osfildef *fp, errcxdef *ec);


/* determine if a char is a valid non-initial character in a symbol name */
#define TOKISSYM(c) \
    (Common::isAlpha((uchar)(c)) || Common::isDigit((uchar)(c)) || (c)=='_' || (c)=='$')

/* numeric conversion and checking macros */
#define TOKISHEX(c) \
    (Common::isDigit((uchar)(c))||((c)>='a'&&(c)<='f')||((c)>='A'&&(c)<='F'))
#define TOKISOCT(c) \
    (Common::isDigit((uchar)(c))&&!((c)=='8'||(c)=='9'))

#define TOKHEX2INT(c) \
    (Common::isDigit((uchar)c)?(c)-'0':((c)>='a'?(c)-'a'+10:(c)-'A'+10))
#define TOKOCT2INT(c) ((c)-'0')
#define TOKDEC2INT(c) ((c)-'0')

} // End of namespace TADS2
} // End of namespace TADS
} // End of namespace Glk

#endif