From f20d07381cfdbb625550167e259a2ea58f81fcd9 Mon Sep 17 00:00:00 2001 From: Paul Gilbert Date: Sat, 1 Dec 2018 16:34:48 -0800 Subject: GLK: TADS: Added cmap file --- engines/glk/module.mk | 1 + engines/glk/tads/tads2/ler.h | 319 +++++++++ engines/glk/tads/tads2/os.cpp | 36 + engines/glk/tads/tads2/os.h | 156 +--- engines/glk/tads/tads2/regex.cpp | 1276 +++++++++++++++++++++++++++++++++ engines/glk/tads/tads2/regex.h | 315 ++++++++ engines/glk/tads/tads2/tads2.cpp | 1 + engines/glk/tads/tads2/tads2.h | 128 +++- engines/glk/tads/tads2/tads2_cmap.cpp | 268 +++++++ engines/glk/tads/tads2/types.h | 188 +++++ 10 files changed, 2547 insertions(+), 141 deletions(-) create mode 100644 engines/glk/tads/tads2/regex.cpp create mode 100644 engines/glk/tads/tads2/regex.h create mode 100644 engines/glk/tads/tads2/tads2_cmap.cpp create mode 100644 engines/glk/tads/tads2/types.h diff --git a/engines/glk/module.mk b/engines/glk/module.mk index a8128506c9..5c17362a0d 100644 --- a/engines/glk/module.mk +++ b/engines/glk/module.mk @@ -49,6 +49,7 @@ MODULE_OBJS := \ tads/tads2/os.o \ tads/tads2/regex.o \ tads/tads2/tads2.o \ + tads/tads2/tads2_cmap.o \ tads/tads3/tads3.o # This module can be built as a plugin diff --git a/engines/glk/tads/tads2/ler.h b/engines/glk/tads/tads2/ler.h index 000aa093f4..8edb945115 100644 --- a/engines/glk/tads/tads2/ler.h +++ b/engines/glk/tads/tads2/ler.h @@ -34,6 +34,314 @@ namespace TADS2 { // maximum length of a facility identifier #define ERRFACMAX 6 +enum ErrorCode { + /* memory/cache manager errors */ + ERR_NOMEM = 1, /* out of memory */ + ERR_FSEEK = 2, /* error seeking in file */ + ERR_FREAD = 3, /* error reading from file */ + ERR_NOPAGE = 4, /* no more page slots */ + ERR_REALCK = 5, /* attempting to reallocate a locked object */ + ERR_SWAPBIG = 6, /* swapfile limit reached - out of virtual memory */ + ERR_FWRITE = 7, /* error writing file */ + ERR_SWAPPG = 8, /* exceeded swap page table limit */ + ERR_CLIUSE = 9, /* requested client object number already in use */ + ERR_CLIFULL = 10, /* client mapping table is full */ + ERR_NOMEM1 = 11, /* swapping/garbage collection failed to find enuf */ + ERR_NOMEM2 = 12, /* no memory to resize (expand) an object */ + ERR_OPSWAP = 13, /* unable to open swap file */ + ERR_NOHDR = 14, /* can't get a new object header */ + ERR_NOLOAD = 15, /* mcm cannot find object to load (internal error) */ + ERR_LCKFRE = 16, /* attempting to free a locked object (internal) */ + ERR_INVOBJ = 17, /* invalid object */ + ERR_BIGOBJ = 18, /* object too big - exceeds memory allocation limit */ + + /* lexical analysis errors */ + ERR_INVTOK = 100, /* invalid token */ + ERR_STREOF = 101, /* end of file while scanning string */ + ERR_TRUNC = 102, /* symbol too long - truncated */ + ERR_NOLCLSY = 103, /* no space in local symbol table */ + ERR_PRPDIR = 104, /* invalid preprocessor (#) directive */ + ERR_INCNOFN = 105, /* no filename in #include directive */ + ERR_INCSYN = 106, /* invalid #include syntax */ + ERR_INCSEAR = 107, /* can't find included file */ + ERR_INCMTCH = 108, /* no matching delimiter in #include filename */ + ERR_MANYSYM = 109, /* out of space for symbol table */ + ERR_LONGLIN = 110, /* line too long */ + ERR_INCRPT = 111, /* header file already included - ignored */ + ERR_PRAGMA = 112, /* unknown pragma (ignored) */ + ERR_BADPELSE = 113, /* unexpected #else */ + ERR_BADENDIF = 114, /* unexpected #endif */ + ERR_BADELIF = 115, /* unexpected #elif */ + ERR_MANYPIF = 116, /* #if nesting too deep */ + ERR_DEFREDEF = 117, /* symbol already defined */ + ERR_PUNDEF = 118, /* #undef symbol not defined */ + ERR_NOENDIF = 119, /* missing #endif */ + ERR_MACNEST = 120, /* macros nested too deeply */ + ERR_BADISDEF = 121, /* invalid argument for defined() operator */ + ERR_PIF_NA = 122, /* #if is not implemented */ + ERR_PELIF_NA = 123, /* #elif is not implemented */ + ERR_P_ERROR = 124, /* error directive: %s */ + ERR_LONG_FILE_MACRO = 125, /* __FILE__ expansion too long */ + ERR_LONG_LINE_MACRO = 126, /* __LINE__ expansion too long */ + + /* undo errors */ + ERR_UNDOVF = 200, /* operation is too big for undo log */ + ERR_NOUNDO = 201, /* no more undo information */ + ERR_ICUNDO = 202, /* incomplete undo (no previous savepoint) */ + + /* parser errors */ + ERR_REQTOK = 300, /* expected token (arg 1) - found something else */ + ERR_REQSYM = 301, /* expected symbol */ + ERR_REQPRP = 302, /* expected a property name */ + ERR_REQOPN = 303, /* expected operand */ + ERR_REQARG = 304, /* expected comma or closing paren (arg list) */ + ERR_NONODE = 305, /* no space for new parse node */ + ERR_REQOBJ = 306, /* epxected object name */ + ERR_REQEXT = 307, /* redefining symbol as external function */ + ERR_REQFCN = 308, /* redefining symbol as function */ + ERR_NOCLASS = 309, /* can't use CLASS with function/external function */ + ERR_REQUNO = 310, /* required unary operator */ + ERR_REQBIN = 311, /* required binary operator */ + ERR_INVBIN = 312, /* invalid binary operator */ + ERR_INVASI = 313, /* invalid assignment */ + ERR_REQVAR = 314, /* required variable name */ + ERR_LCLSYN = 315, /* required comma or semicolon in local list */ + ERR_REQRBR = 316, /* required right brace (eof before end of group) */ + ERR_BADBRK = 317, /* 'break' without 'while' */ + ERR_BADCNT = 318, /* 'continue' without 'while' */ + ERR_BADELS = 319, /* 'else' without 'if' */ + ERR_WEQASI = 320, /* warning: possible use of '=' where ':=' intended */ + ERR_EOF = 321, /* unexpected end of file */ + ERR_SYNTAX = 322, /* general syntax error */ + ERR_INVOP = 323, /* invalid operand type */ + ERR_NOMEMLC = 324, /* no memory for new local symbol table */ + ERR_NOMEMAR = 325, /* no memory for argument symbol table */ + ERR_FREDEF = 326, /* redefining a function which is already defined */ + ERR_NOSW = 327, /* 'case' or 'default' and not in switch block */ + ERR_SWRQCN = 328, /* constant required in switch case value */ + ERR_REQLBL = 329, /* label required for 'goto' */ + ERR_NOGOTO = 330, /* 'goto' label never defined */ + ERR_MANYSC = 331, /* too many superclasses for object */ + ERR_OREDEF = 332, /* redefining symbol as object */ + ERR_PREDEF = 333, /* property being redefined in object */ + ERR_BADPVL = 334, /* invalid property value */ + ERR_BADVOC = 335, /* bad vocabulary property value */ + ERR_BADTPL = 336, /* bad template property value (need sstring) */ + ERR_LONGTPL = 337, /* template base property name too long */ + ERR_MANYTPL = 338, /* too many templates (internal compiler limit) */ + ERR_BADCMPD = 339, /* bad value for compound word (sstring required) */ + ERR_BADFMT = 340, /* bad value for format string (sstring needed) */ + ERR_BADSYN = 341, /* invalid value for synonym (sstring required) */ + ERR_UNDFSYM = 342, /* undefined symbol */ + ERR_BADSPEC = 343, /* bad special word */ + ERR_NOSELF = 344, /* "self" not valid in this context */ + ERR_STREND = 345, /* warning: possible unterminated string */ + ERR_MODRPLX = 346, /* modify/replace not allowed with external func */ + ERR_MODFCN = 347, /* modify not allowed with function */ + ERR_MODFWD = 348, /* modify/replace not allowed with forward func */ + ERR_MODOBJ = 349, /* modify can only be used with a defined object */ + ERR_RPLSPEC = 350, /* warning - replacing specialWords */ + ERR_SPECNIL = 351, /* nil only allowed with modify specialWords */ + ERR_BADLCL = 353, /* 'local' statement must precede executable code */ + ERR_IMPPROP = 354, /* implied verifier '%s' is not a property */ + ERR_BADTPLF = 355, /* invalid command template flag */ + ERR_NOTPLFLG = 356, /* flags are not allowed with old file format */ + ERR_AMBIGBIN = 357, /* warning: operator '%s' could be binary */ + ERR_PIA = 358, /* warning: possibly incorrect assignment */ + ERR_BADSPECEXPR = 359, /* invalid speculation evaluation */ + + /* code generation errors */ + ERR_OBJOVF = 400, /* object cannot grow any bigger - code too big */ + ERR_NOLBL = 401, /* no more temporary labels/fixups */ + ERR_LBNOSET = 402, /* (internal error) label never set */ + ERR_INVLSTE = 403, /* invalid datatype for list element */ + ERR_MANYDBG = 404, /* too many debugger line records (internal limit) */ + + /* vocabulary setup errors */ + ERR_VOCINUS = 450, /* vocabulary being redefined for object */ + ERR_VOCMNPG = 451, /* too many vocwdef pages (internal limit) */ + ERR_VOCREVB = 452, /* redefining same verb */ + ERR_VOCREVB2 = 453, /* redefining same verb - two arguments */ + + /* set-up errors */ + ERR_LOCNOBJ = 500, /* location of object %s is not an object */ + ERR_CNTNLST = 501, /* contents of object %s is not list */ + ERR_SUPOVF = 502, /* overflow trying to build contents list */ + ERR_RQOBJNF = 503, /* required object %s not found */ + ERR_WRNONF = 504, /* warning - object %s not found */ + ERR_MANYBIF = 505, /* too many built-in functions (internal error) */ + + /* fio errors */ + ERR_OPWGAM = 600, /* unable to open game for writing */ + ERR_WRTGAM = 601, /* error writing to game file */ + ERR_FIOMSC = 602, /* too many sc's for writing in fiowrt */ + ERR_UNDEFF = 603, /* undefined function */ + ERR_UNDEFO = 604, /* undefined object */ + ERR_UNDEF = 605, /* undefined symbols found */ + ERR_OPRGAM = 606, /* unable to open game for reading */ + ERR_RDGAM = 607, /* error reading game file */ + ERR_BADHDR = 608, /* file has invalid header - not TADS game file */ + ERR_UNKRSC = 609, /* unknown resource type in .gam file */ + ERR_UNKOTYP = 610, /* unknown object type in OBJ resource */ + ERR_BADVSN = 611, /* file saved by different incompatible version */ + ERR_LDGAM = 612, /* error loading object on demand */ + ERR_LDBIG = 613, /* object too big for load region (prob. internal) */ + ERR_UNXEXT = 614, /* did not expect external function */ + ERR_WRTVSN = 615, /* compiler cannot write the requested version */ + ERR_VNOCTAB = 616, /* format version cannot be used with -ctab */ + ERR_BADHDRRSC = 617, /* invalid resource file header in file %s */ + ERR_RDRSC = 618, /* error reading resource file "xxx" */ + + /* character mapping errors */ + ERR_CHRNOFILE = 700, /* unable to load character mapping file */ + + /* user interrupt */ + ERR_USRINT = 990, /* user requested cancel of current operation */ + + /* run-time errors */ + ERR_STKOVF = 1001, /* stack overflow */ + ERR_HPOVF = 1002, /* heap overflow */ + ERR_REQNUM = 1003, /* numeric value required */ + ERR_STKUND = 1004, /* stack underflow */ + ERR_REQLOG = 1005, /* logical value required */ + ERR_INVCMP = 1006, /* invalid datatypes for magnitude comparison */ + ERR_REQSTR = 1007, /* string value required */ + ERR_INVADD = 1008, /* invalid datatypes for '+' operator */ + ERR_INVSUB = 1009, /* invalid datatypes for binary '-' operator */ + ERR_REQVOB = 1010, /* require object value */ + ERR_REQVFN = 1011, /* required function pointer */ + ERR_REQVPR = 1012, /* required property number value */ + + /* non-error conditions: run-time EXIT, ABORT, ASKIO, ASKDO */ + ERR_RUNEXIT = 1013, /* 'exit' statement executed */ + ERR_RUNABRT = 1014, /* 'abort' statement executed */ + ERR_RUNASKD = 1015, /* 'askdo' statement executed */ + ERR_RUNASKI = 1016, /* 'askio' executed; int arg 1 is prep */ + ERR_RUNQUIT = 1017, /* 'quit' executed */ + ERR_RUNRESTART = 1018, /* 'reset' executed */ + ERR_RUNEXITOBJ = 1019, /* 'exitobj' executed */ + + ERR_REQVLS = 1020, /* list value required */ + ERR_LOWINX = 1021, /* index value too low (must be >= 1) */ + ERR_HIGHINX = 1022, /* index value too high (must be <= length(list)) */ + ERR_INVTBIF = 1023, /* invalid type for built-in function */ + ERR_INVVBIF = 1024, /* invalid value for built-in function */ + ERR_BIFARGC = 1025, /* wrong number of arguments to built-in */ + ERR_ARGC = 1026, /* wrong number of arguments to user function */ + ERR_FUSEVAL = 1027, /* string/list not allowed for fuse/daemon arg */ + ERR_BADSETF = 1028, /* internal error in setfuse/setdaemon/notify */ + ERR_MANYFUS = 1029, /* too many fuses */ + ERR_MANYDMN = 1030, /* too many daemons */ + ERR_MANYNFY = 1031, /* too many notifiers */ + ERR_NOFUSE = 1032, /* fuse not found in remfuse */ + ERR_NODMN = 1033, /* daemon not found in remdaemon */ + ERR_NONFY = 1034, /* notifier not found in unnotify */ + ERR_BADREMF = 1035, /* internal error in remfuse/remdaemon/unnotify */ + ERR_DMDLOOP = 1036, /* load-on-demand loop: property not being set */ + ERR_UNDFOBJ = 1037, /* undefined object in vocabulary tree */ + ERR_BIFCSTR = 1038, /* c-string conversion overflows buffer */ + ERR_INVOPC = 1039, /* invalid opcode */ + ERR_RUNNOBJ = 1040, /* runtime error: property taken of non-object */ + ERR_EXTLOAD = 1041, /* unable to load external function "%s" */ + ERR_EXTRUN = 1042, /* error executing external function "%s" */ + ERR_CIRCSYN = 1043, /* circular synonym */ + ERR_DIVZERO = 1044, /* divide by zero */ + ERR_BADDEL = 1045, /* can only delete objects created with "new" */ + ERR_BADNEWSC = 1046, /* superclass for "new" cannot be a new object */ + ERR_VOCSTK = 1047, /* insufficient space in parser stack */ + ERR_BADFILE = 1048, /* invalid file handle */ + + ERR_RUNEXITPRECMD = 1049, /* exited from preCommand */ + + /* run-time parser errors */ + ERR_PRS_SENT_UNK = 1200, /* sentence structure not recognized */ + ERR_PRS_VERDO_FAIL = 1201, /* verDoVerb failed */ + ERR_PRS_VERIO_FAIL = 1202, /* verIoVerb failed */ + ERR_PRS_NO_VERDO = 1203, /* no verDoVerb for direct object */ + ERR_PRS_NO_VERIO = 1204, /* no verIoVerb for direct object */ + ERR_PRS_VAL_DO_FAIL = 1205, /* direct object validation failed */ + ERR_PRS_VAL_IO_FAIL = 1206, /* indirect object validation failed */ + + /* compiler/runtime/debugger driver errors */ + ERR_USAGE = 1500, /* invalid usage */ + ERR_OPNINP = 1501, /* error opening input file */ + ERR_NODBG = 1502, /* game not compiled for debugging */ + ERR_ERRFIL = 1503, /* unable to open error capture file */ + ERR_PRSCXSIZ = 1504, /* parse pool + local size too large */ + ERR_STKSIZE = 1505, /* stack size too large */ + ERR_OPNSTRFIL = 1506, /* error opening string capture file */ + ERR_INVCMAP = 1507, /* invalid character map file */ + + /* debugger errors */ + ERR_BPSYM = 2000, /* symbol not found for breakpoint */ + ERR_BPPROP = 2002, /* breakpoint symbol is not a property */ + ERR_BPFUNC = 2003, /* breakpoint symbol is not a function */ + ERR_BPNOPRP = 2004, /* property is not defined for object */ + ERR_BPPRPNC = 2005, /* property is not code */ + ERR_BPSET = 2006, /* breakpoint already set at this location */ + ERR_BPNOTLN = 2007, /* breakpoint is not at a line (OPCLINE instr) */ + ERR_MANYBP = 2008, /* too many breakpoints */ + ERR_BPNSET = 2009, /* breakpoint to be deleted was not set */ + ERR_DBGMNSY = 2010, /* too many symbols in debug expression (int lim) */ + ERR_NOSOURC = 2011, /* unable to find source file %s */ + ERR_WTCHLCL = 2012, /* illegal to assign to local in watch expr */ + ERR_INACTFR = 2013, /* inactive frame (expression value not available) */ + ERR_MANYWX = 2014, /* too many watch expressions */ + ERR_WXNSET = 2015, /* watchpoint not set */ + ERR_EXTRTXT = 2016, /* extraneous text at end of command */ + ERR_BPOBJ = 2017, /* breakpoint symbol is not an object */ + ERR_DBGINACT = 2018, /* debugger is not active */ + ERR_BPINUSE = 2019, /* breakpoint is already used */ + ERR_RTBADSPECEXPR = 2020, /* invalid speculative expression */ + ERR_NEEDLIN2 = 2021, /* -ds2 information not found - must recompile */ + + /* usage error messages */ + ERR_TCUS1 = 3000, /* first tc usage message */ + ERR_TCUSL = 3024, /* last tc usage message */ + ERR_TCTGUS1 = 3030, /* first tc toggle message */ + ERR_TCTGUSL = 3032, + ERR_TCZUS1 = 3040, /* first tc -Z suboptions usage message */ + ERR_TCZUSL = 3041, + ERR_TC1US1 = 3050, /* first tc -1 suboptions usage message */ + ERR_TC1USL = 3058, + ERR_TCMUS1 = 3070, /* first tc -m suboptions usage message */ + ERR_TCMUSL = 3076, + ERR_TCVUS1 = 3080, /* first -v suboption usage message */ + ERR_TCVUSL = 3082, + ERR_TRUSPARM = 3099, + ERR_TRUS1 = 3100, /* first tr usage message */ + ERR_TRUSL = 3117, + ERR_TRUSFT1 = 3118, /* first tr "footer" message */ + ERR_TRUSFTL = 3119, /* last tr "footer" message */ + ERR_TRSUS1 = 3150, /* first tr -s suboptions usage message */ + ERR_TRSUSL = 3157, + ERR_TDBUSPARM = 3199, + ERR_TDBUS1 = 3200, /* first tdb usage message */ + ERR_TDBUSL = 3214, /* last tdb usage message */ + + /* TR 16-bit MSDOS-specific usage messages */ + ERR_TRUS_DOS_1 = 3300, + ERR_TRUS_DOS_L = 3300, + + /* TR 32-bit MSDOS console mode usage messages */ + ERR_TRUS_DOS32_1 = 3310, + ERR_TRUS_DOS32_L = 3312, + + /* TADS/Graphic errors */ + ERR_GNOFIL = 4001, /* can't find graphics file %s */ + ERR_GNORM = 4002, /* can't find room %s */ + ERR_GNOOBJ = 4003, /* can't find hot spot object %s */ + ERR_GNOICN = 4004 /* can't find icon object %s */ +}; + +/* + * Special error flag - this is returned from execmd() when preparseCmd + * returns a command list. This indicates to voc1cmd that it should try + * the command over again, using the words in the new list. + */ +#define ERR_PREPRSCMDREDO 30000 /* preparseCmd returned a list */ +#define ERR_PREPRSCMDCAN 30001 /* preparseCmd returned 'nil' to cancel */ + union erradef { int erraint; // integer argument char *errastr; // text string argument @@ -274,6 +582,17 @@ void errlogf(errcxdef *ctx, char *facility, int err); (errargv(ctx,0,typ1,arg1),errargv(ctx,1,typ2,arg2),\ errargc(ctx,2),errlogn(ctx,e,fac)) +/** + * For compatility with old facility-free mechanism, signal with facility "TADS" + */ +#define errsig(ctx, err) errsigf(ctx, "TADS", err) +#define errsig1(c, e, t, a) errsigf1(c,"TADS",e,t,a) +//#define errsig2(c, e, t1, a1, t2, a2) errsigf2(c,"TADS",e,t1,a1,t2,a2) +#define errlog(c, e) errlogf(c, "TADS", e) +#define errlog1(c, e, t, a) errlogf1(c,"TADS",e,t,a) +#define errlog2(c, e, t1, a1, t2, a2) errlogf2(c,"TADS",e,t1,a1,t2,a2) + +#define errsig2(c, e, t1, a1, t2, a2) error("Error occurred") // get the text of an error void errmsg(errcxdef *ctx, char *outbuf, uint outbufl, uint err); diff --git a/engines/glk/tads/tads2/os.cpp b/engines/glk/tads/tads2/os.cpp index 248611d5ad..514a912bd5 100644 --- a/engines/glk/tads/tads2/os.cpp +++ b/engines/glk/tads/tads2/os.cpp @@ -178,6 +178,42 @@ osfildef *OS::oss_open_stream(char *buffer, glui32 tadsusage, glui32 tbusage, return *osf; } +osfildef *OS::osfoprb(const char *fname, uint typ) { + Common::File *f = new Common::File(); + if (f->open(fname)) + return f; + + f->close(); + delete f; + return nullptr; +} + +void OS::os_gen_charmap_filename(char *filename, const char *internal_id, + const char *argv0) { + const char *p; + const char *rootname; + size_t pathlen; + + // find the path prefix of the original executable filename + for (p = rootname = argv0; *p != '\0'; ++p) { + if (*p == '/' || *p == '\\' || *p == ':') + rootname = p + 1; + } + + // copy the path prefix + pathlen = rootname - argv0; + memcpy(filename, argv0, pathlen); + + // if there's no trailing backslash, add one + if (pathlen == 0 || filename[pathlen - 1] != '\\') + filename[pathlen++] = '\\'; + + // add "win_", plus the character set ID, plus the extension + strcpy(filename + pathlen, "win_"); + strcat(filename + pathlen, internal_id); + strcat(filename + pathlen, ".tcp"); +} + void OS::oss_put_string_with_hilite(winid_t win, const char *str, size_t len) { glk_set_window(win); glk_put_buffer(str, len); diff --git a/engines/glk/tads/tads2/os.h b/engines/glk/tads/tads2/os.h index 3c50cb5e64..ecd046858e 100644 --- a/engines/glk/tads/tads2/os.h +++ b/engines/glk/tads/tads2/os.h @@ -24,145 +24,12 @@ #define GLK_TADS_TADS2_OS #include "glk/tads/tads.h" +#include "glk/tads/tads2/types.h" namespace Glk { namespace TADS { namespace TADS2 { -/** - * Allocate a memory block - */ -#define mchalo(CTX, SIZE, COMMENT) ((byte *)new byte[SIZE]) - -/** - * Free a memory block - */ -#define mchfre(PTR) delete[] (byte *)PTR - - -/** - * The character (or characters) which mark the beginning of a special fileref string. - * The important thing is that the string be one that is either not allowed in - * filenames on your platform or is unlikely to be the first part of a filename. - */ -#define OSS_FILEREF_STRING_PREFIX ":" - -/** - * The character (or characters) which mark the end of a special fileref string. - * Using this and OSS_FILEREF_STRING_PREFIX, you should be able to come up with - * something which forms an invalid filename - */ -#define OSS_FILEREF_STRING_SUFFIX "" - -/** - * Maximum length of status line text - */ -#define OSS_STATUS_STRING_LEN 80 - -/** - * Important note: do not change these values when porting TADS. These - * values can be used by games, so they must be the same on all platforms. - */ -enum { - OS_AFP_OPEN = 1, ///< choose an existing file to open for reading - OS_AFP_SAVE = 2 ///< choose a filename for saving to a file -}; - -/** - * File types.These type codes are used when opening or creating a file, - * so that the OS routine can set appropriate file system metadata - * to describe or find the file type. - * - * The type os_filetype_t is defined for documentary purposes; it's - * always just an int. - */ -enum os_filetype_t { - OSFTGAME = 0, ///< a game data file (.gam) - OSFTSAVE = 1, ///< a saved game (.sav) - OSFTLOG = 2, ///< a transcript (log) file - OSFTSWAP = 3, ///< swap file - OSFTDATA = 4, ///< user data file (used with the TADS fopen() call) - OSFTCMD = 5, ///< QA command/log file - OSFTERRS = 6, ///< error message file - OSFTTEXT = 7, ///< text file - used for source files - OSFTBIN = 8, ///< binary file of unknown type - resources, etc - OSFTCMAP = 9, ///< character mapping file - OSFTPREF = 10, ///< preferences file - OSFTUNK = 11, ///< unknown - as a filter, matches any file type - OSFTT3IMG = 12, ///< T3 image file (.t3 - formerly .t3x) - OSFTT3OBJ = 13, ///< T3 object file (.t3o) - OSFTT3SYM = 14, ///< T3 symbol export file (.t3s) - OSFTT3SAV = 15 ///< T3 saved state file (.t3v) -}; - -/** - * Constants for os_getc() when returning commands. When used for command line - * editing, special keys (arrows, END, etc.) should cause os_getc() to return 0, - * and return the appropriate CMD_ value on the NEXT call. Hence, os_getc() must - * keep the appropriate information around statically for the next call when a - * command key is issued. - * - * The comments indicate which CMD_xxx codes are "translated" codes and which are - * "raw"; the difference is that, when a particular keystroke could be interpreted - * as two different CMD_xxx codes, one translated and the other raw, os_getc() - * should always return the translated version of the key, and os_getc_raw() - * should return the raw version. - */ -enum KeyCmd { - CMD_UP = 1, ///< move up/up arrow (translated) - CMD_DOWN = 2, ///< move down/down arrow (translated) - CMD_RIGHT = 3, ///< move right/right arrow (translated) - CMD_LEFT = 4, ///< move left/left arrow (translated) - CMD_END = 5, ///< move cursor to end of line (translated) - CMD_HOME = 6, ///< move cursor to start of line (translated) - CMD_DEOL = 7, ///< delete to end of line (translated) - CMD_KILL = 8, ///< delete entire line (translated) - CMD_DEL = 9, ///< delete current character (translated) - CMD_SCR = 10, ///< toggle scrollback mode (translated) - CMD_PGUP = 11, ///< page up (translated) - CMD_PGDN = 12, ///< page down (translated) - CMD_TOP = 13, ///< top of file (translated) - CMD_BOT = 14, ///< bottom of file (translated) - CMD_F1 = 15, ///< function key F1 (raw) - CMD_F2 = 16, ///< function key F2 (raw) - CMD_F3 = 17, ///< function key F3 (raw) - CMD_F4 = 18, ///< function key F4 (raw) - CMD_F5 = 19, ///< function key F5 (raw) - CMD_F6 = 20, ///< function key F6 (raw) - CMD_F7 = 21, ///< function key F7 (raw) - CMD_F8 = 22, ///< function key F8 (raw) - CMD_F9 = 23, ///< function key F9 (raw) - CMD_F10 = 24, ///< function key F10 (raw) - CMD_CHOME = 25, ///< control-home (raw) - CMD_TAB = 26, ///< tab (translated) - CMD_SF2 = 27, ///< shift-F2 (raw) - ///< not used (obsolete) - 28 - CMD_WORD_LEFT = 29, ///< word left (ctrl-left on dos) (translated) - CMD_WORD_RIGHT = 30, ///< word right (ctrl-right on dos) (translated) - CMD_WORDKILL = 31, ///< delete word right (translated) - CMD_EOF = 32, ///< end-of-file (raw) - CMD_BREAK = 33, ///< break (Ctrl-C or local equivalent) (translated) - CMD_INS = 34, ///< insert key (raw) - - /** - * ALT-keys - add alphabetical code to CMD_ALT: ALT-A == CMD_ALT + 0, - * ALT-B == CMD_ALT + 1, ALT-C == CMD_ALT + 2, etc - * - * These keys are all raw (untranslated). - */ - CMD_ALT = 128 ///< start of ALT keys -}; - -/** - * Status mode codes - */ -enum StatusMode { - OSS_STATUS_MODE_STORY = 0, - OSS_STATUS_MODE_STATUS = 1 -}; - -typedef Common::SeekableReadStream osfildef; - /** * Operating system compatibility layer */ @@ -262,6 +129,27 @@ protected: */ const char *os_get_root_name(const char *buf) const { return buf; } + /** + * Open a file for access + */ + osfildef *osfoprb(const char *fname, uint typ = 0); + + /** + * Receive notification that a character mapping file has been loaded. We + * don't need to do anything with this information, since we we're relying + * on the Glk layer and ScummVM backend to handle all that + */ + void os_advise_load_charmap(const char *id, const char *ldesc, const char *sysinfo) { + // No implementation needed + } + + /** + * Generate a filename for a character mapping table. On Windows, the + * filename is always simply "win" plus the internal ID plus ".tcp". + */ + void os_gen_charmap_filename(char *filename, const char *internal_id, + const char *argv0); + /**@}*/ /** diff --git a/engines/glk/tads/tads2/regex.cpp b/engines/glk/tads/tads2/regex.cpp new file mode 100644 index 0000000000..f6a009a0a9 --- /dev/null +++ b/engines/glk/tads/tads2/regex.cpp @@ -0,0 +1,1276 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +/* +Regular Expression Parser and Recognizer for TADS +Function + Parses and recognizes regular expressions +Notes + Regular expression syntax: + + abc|def either abc or def + (abc) abc + abc+ abc, abcc, abccc, ... + abc* ab, abc, abcc, ... + abc? ab or abc + . any single character + abc$ abc at the end of the line + ^abc abc at the beginning of the line + %^abc literally ^abc + [abcx-z] matches a, b, c, x, y, or z + [^abcx-z] matches any character except a, b, c, x, y, or z + [^]-q] matches any character except ], -, or q + + Note that using ']' or '-' in a character range expression requires + special ordering. If ']' is to be used, it must be the first character + after the '^', if present, within the brackets. If '-' is to be used, + it must be the first character after the '^' and/or ']', if present. + + '%' is used to escape the special characters: | . ( ) * ? + ^ $ % [ + (We use '%' rather than a backslash because it's less trouble to + enter in a TADS string -- a backslash needs to be quoted with another + backslash, which is error-prone and hard to read. '%' doesn't need + any special quoting in a TADS string, which makes it a lot more + readable.) + + In addition, '%' is used to introduce additional special sequences: + + %1 text matching first parenthesized expression + %9 text matching ninth parenthesized experssion + %< matches at the beginning of a word only + %> matches at the end of a word only + %w matches any word character + %W matches any non-word character + %b matches at any word boundary (beginning or end of word) + %B matches except at a word boundary + + For the word matching sequences, a word is any sequence of letters and + numbers. +*/ + +#include "engines/glk/tads/tads2/regex.h" +#include "engines/glk/tads/tads2/ler.h" +#include "engines/glk/tads/tads2/os.h" +//#include "engines/glk/tads/tads2/std.h" +#//include "engines/glk/tads/tads2/ler.h" + +namespace Glk { +namespace TADS { +namespace TADS2 { + +/** + * A "machine" (i.e., a finite state automaton) is a set of state + * transition tuples. A tuple has three elements: the state ID, the ID + * of the state that we transition to, and the condition for the + * transition. The condition is simply the character that we must match + * to make the transition, or a special distinguished symbol "epsilon," + * which refers to a transition with no input character consumed. + * + * The primitive elements of our machines guarantee that we never have + * more than two transitions out of a particular state, so we can + * denormalize the representation of a state by storing the two possible + * tuples for that state in a single combined tuple. This has the + * performance advantage that we can use the state ID as an index into + * an array of state tuples. + * + * A particular machine always has a single initial and single final + * (successful) state, so we can define a machine by its initial and + * final state ID's. + */ +enum { + // the special symbol value for "epsilon" + RE_EPSILON = '\001', + + // the special symbol value for a wildcard character + RE_WILDCARD = '\002', + + // special symbol values for beginning and end of text + RE_TEXT_BEGIN = '\003', + RE_TEXT_END = '\004', + + // special symbol values for start and end of a word + RE_WORD_BEGIN = '\005', + RE_WORD_END = '\006', + + // special symbols for word-char and non-word-char + RE_WORD_CHAR = '\007', + RE_NON_WORD_CHAR = '\010', + + // special symbols for word-boundary and non-word-boundary + RE_WORD_BOUNDARY = '\011', + RE_NON_WORD_BOUNDARY = '\012', + + // special symbol for a character range/exclusion range + RE_RANGE = '\013', + RE_RANGE_EXCL = '\014', + + // a range of special symbol values for group matchers + RE_GROUP_MATCH_0 = '\015', + RE_GROUP_MATCH_9 = (RE_GROUP_MATCH_0 + 9) +}; + +re_context::re_context(errcxdef *errctx) { + // save the error context + _errctx = errctx; + + // clear states + _next_state = RE_STATE_FIRST_VALID; + + // clear groups + _cur_group = 0; + + // no string buffer yet + _strbuf = 0; +} + +re_context::~re_context() { + // reset state + reset(); + + // if we allocated a string buffer, delete it + if (_strbuf != 0) { + mchfre(_strbuf); + _strbuf = nullptr; + } +} + +void re_context::reset() { + int i; + + // delete any range tables we've allocated + for (i = 0 ; i < _next_state ; ++i) { + if (_tuple_arr[i].char_range != 0) { + mchfre(_tuple_arr[i].char_range); + _tuple_arr[i].char_range = 0; + } + } + + // clear states + _next_state = RE_STATE_FIRST_VALID; + + // clear groups + _cur_group = 0; +} + +re_state_id re_context::alloc_state() { + // If we don't have enough room for another state, expand the array + if (_next_state >= (int)_tuple_arr.size()) { + // bump the size by a bit + _tuple_arr.resize(_tuple_arr.size() + 100); + } + + // initialize the next state + _tuple_arr[_next_state].next_state_1 = RE_STATE_INVALID; + _tuple_arr[_next_state].next_state_2 = RE_STATE_INVALID; + _tuple_arr[_next_state].ch = RE_EPSILON; + _tuple_arr[_next_state].flags = 0; + _tuple_arr[_next_state].char_range = 0; + + // return the new state's ID + return _next_state++; +} + +void re_context::set_trans(re_state_id id, re_state_id dest_id, char ch) { + re_tuple *tuple; + + /* + * get the tuple containing the transitions for this state ID - the + * state ID is the index of the state's transition tuple in the array + */ + tuple = &_tuple_arr[id]; + + /* + * If the first state pointer hasn't been set yet, set it to the new + * destination. Otherwise, set the second state pointer. + * + * Only set the character on setting the first state. When setting + * the second state, we must assume that the character for the state + * has already been set, since any given state can have only one + * character setting. + */ + if (tuple->next_state_1 == RE_STATE_INVALID) { + /* + * set the character ID, unless the state has been marked with a + * special flag which indicates that the character value has + * another meaning (in particular, a group marker) + */ + if (!(tuple->flags & (RE_STATE_GROUP_BEGIN | RE_STATE_GROUP_END))) + tuple->ch = ch; + + // set the first transition + tuple->next_state_1 = dest_id; + } else { + // set only the second transition state - don't set the character + tuple->next_state_2 = dest_id; + } +} + +void re_context::init_machine(re_machine *machine) { + machine->init = alloc_state(); + machine->final = alloc_state(); +} + +void re_context::build_char(re_machine *machine, char ch) { + // initialize our new machine + init_machine(machine); + + // allocate a transition tuple for the new state + set_trans(machine->init, machine->final, ch); +} + +void re_context::build_char_range(re_machine *machine, unsigned char *range, int exclusion) { + unsigned char *range_copy; + + // initialize our new machine + init_machine(machine); + + // allocate a transition table for the new state + set_trans(machine->init, machine->final, (char)(exclusion ? RE_RANGE_EXCL : RE_RANGE)); + + // allocate a copy of the range bit vector + range_copy = (unsigned char *)mchalo(_errctx, 32, "regex range"); + + // copy the caller's range + memcpy(range_copy, range, 32); + + // store it in the tuple + _tuple_arr[machine->init].char_range = range_copy; +} + +void re_context::build_group_matcher(re_machine *machine, int group_num) { + // initialize our new machine + init_machine(machine); + + /* + * Allocate a transition tuple for the new state, using the group ID + * as the character code. Store the special code for a group + * recognizer rather than the normal literal character code. + */ + set_trans(machine->init, machine->final, (char)(group_num + RE_GROUP_MATCH_0)); +} + +void re_context::build_concat(re_machine *new_machine, re_machine *lhs, re_machine *rhs) { + // initialize the new machine + init_machine(new_machine); + + /* + * Set up an epsilon transition from the new machine's initial state + * to the first submachine's initial state + */ + set_trans(new_machine->init, lhs->init, RE_EPSILON); + + /* + * Set up an epsilon transition from the first submachine's final + * state to the second submachine's initial state + */ + set_trans(lhs->final, rhs->init, RE_EPSILON); + + /* + * Set up an epsilon transition from the second submachine's final + * state to our new machine's final state + */ + set_trans(rhs->final, new_machine->final, RE_EPSILON); +} + +void re_context::build_group(re_machine *new_machine, re_machine *sub_machine, int group_id) { + // initialize the container machine + init_machine(new_machine); + + /* + * set up an epsilon transition from the new machine's initial state + * into the initial state of the group, and another transition from + * the group's final state into the container's final state + */ + set_trans(new_machine->init, sub_machine->init, RE_EPSILON); + set_trans(sub_machine->final, new_machine->final, RE_EPSILON); + + // Mark the initial and final states of the group machine as being group markers + _tuple_arr[new_machine->init].flags |= RE_STATE_GROUP_BEGIN; + _tuple_arr[new_machine->final].flags |= RE_STATE_GROUP_END; + + // store the group ID in the 'ch' member of the start and end states + _tuple_arr[new_machine->init].ch = group_id; + _tuple_arr[new_machine->final].ch = group_id; +} + +void re_context::build_alter(re_machine *new_machine, re_machine *lhs, re_machine *rhs) { + // initialize the new machine + init_machine(new_machine); + + /* + * Set up an epsilon transition from our new machine's initial state + * to the initial state of each submachine + */ + set_trans(new_machine->init, lhs->init, RE_EPSILON); + set_trans(new_machine->init, rhs->init, RE_EPSILON); + + /* + * Set up an epsilon transition from the final state of each + * submachine to our final state + */ + set_trans(lhs->final, new_machine->final, RE_EPSILON); + set_trans(rhs->final, new_machine->final, RE_EPSILON); +} + +void re_context::build_closure(re_machine *new_machine, re_machine *sub, char specifier) { + // initialize the new machine + init_machine(new_machine); + + /* + * Set up an epsilon transition from our initial state to the submachine's initial + * state, and from the submachine's final state to our final state + */ + set_trans(new_machine->init, sub->init, RE_EPSILON); + set_trans(sub->final, new_machine->final, RE_EPSILON); + + /* + * If this is an unbounded closure ('*' or '+', but not '?'), set up + * the loop transition that takes us from the new machine's final + * state back to its initial state. We don't do this on the + * zero-or-one closure, because we can only match the expression + * once. + */ + if (specifier != '?') + set_trans(sub->final, sub->init, RE_EPSILON); + + /* + * If this is a zero-or-one closure or a zero-or-more closure, set + * up an epsilon transition from our initial state to our final + * state, since we can skip the entire subexpression. We don't do + * this on the one-or-more closure, because we can't skip the + * subexpression in this case. + */ + if (specifier != '+') + set_trans(new_machine->init, new_machine->final, RE_EPSILON); +} + +void re_context::concat_onto(re_machine *dest, re_machine *rhs) { + // check for a null destination machine + if (dest->isNull()) { + /* + * the first machine is null - simply copy the second machine + * onto the first unchanged + */ + *dest = *rhs; + } else { + re_machine new_machine; + + // build the concatenated machine + build_concat(&new_machine, dest, rhs); + + // copy the concatenated machine onto the first machine + *dest = new_machine; + } +} + +void re_context::alternate_onto(re_machine *dest, re_machine *rhs) { + // check to see if the first machine is null + if (dest->isNull()) { + /* + * the first machine is null - simply copy the second machine + * onto the first + */ + *dest = *rhs; + } else { + /* + * if the second machine is null, don't do anything; otherwise, + * build the alternation + */ + if (!rhs->isNull()) { + re_machine new_machine; + + // build the alternation + build_alter(&new_machine, dest, rhs); + + // replace the first machine with the alternation + *dest = new_machine; + } + } +} + +/** + * Set a bit in a bit vector. + */ +#define re_set_bit(set, bit) \ + (((unsigned char *)(set))[(bit) >> 3] |= (1 << ((bit) & 7))) + +/** + * Test a bit in a bit vector + */ +#define re_is_bit_set(set, bit) \ + ((((unsigned char *)(set))[(bit) >> 3] & (1 << ((bit) & 7))) != 0) + +re_status_t re_context::compile(const char *expr, size_t exprlen, re_machine *result_machine) { + re_machine cur_machine; + re_machine alter_machine; + re_machine new_machine; + size_t group_stack_level; + struct { + re_machine old_cur; + re_machine old_alter; + int group_id; + } group_stack[50]; + + // reset everything + reset(); + + // start out with no current machine and no alternate machine + cur_machine.build_null_machine(); + alter_machine.build_null_machine(); + + // nothing on the stack yet + group_stack_level = 0; + + // loop until we run out of expression to parse + for (; exprlen != 0 ; ++expr, --exprlen) { + switch(*expr) { + case '^': + /* + * beginning of line - if we're not at the beginning of the + * current expression (i.e., we already have some + * concatentations accumulated), treat it as an ordinary + * character + */ + if (!cur_machine.isNull()) + goto normal_char; + + // build a new start-of-text recognizer + build_char(&new_machine, RE_TEXT_BEGIN); + + /* + * concatenate it onto the string - note that this can't + * have any postfix operators + */ + concat_onto(&cur_machine, &new_machine); + break; + + case '$': + /* + * End of line specifier - if there's anything left after + * the '$' other than a close parens or alternation + * specifier, great it as a normal character + */ + if (exprlen > 1 + && (*(expr+1) != ')' && *(expr+1) != '|')) + goto normal_char; + + // build a new end-of-text recognizer + build_char(&new_machine, RE_TEXT_END); + + /* + * concatenate it onto the string - note that this can't + * have any postfix operators + */ + concat_onto(&cur_machine, &new_machine); + break; + + case '(': + /* + * Add a nesting level. Push the current machine and + * alternate machines onto the group stack, and clear + * everything out for the new group. + */ + if (group_stack_level > sizeof(group_stack)/sizeof(group_stack[0])) { + /* we cannot proceed - return an error */ + return RE_STATUS_GROUP_NESTING_TOO_DEEP; + } + + // save the current state on the stack + group_stack[group_stack_level].old_cur = cur_machine; + group_stack[group_stack_level].old_alter = alter_machine; + + /* + * Assign the group a group ID - groups are numbered in + * order of their opening (left) parentheses, so we want to + * assign a group number now. We won't actually need to + * know the group number until we get to the matching close + * paren, but we need to assign it now, so store it in the + * group stack. + */ + group_stack[group_stack_level].group_id = _cur_group; + + // consume the group number + _cur_group++; + + // push the level + ++group_stack_level; + + // start the new group with empty machines + cur_machine.build_null_machine(); + alter_machine.build_null_machine(); + break; + + case ')': + // if there's nothing on the stack, ignore this + if (group_stack_level == 0) + break; + + // take a level off the stack + --group_stack_level; + + /* + * Remove a nesting level. If we have a pending alternate + * expression, build the alternation expression. This will + * leave the entire group expression in alter_machine, + * regardless of whether an alternation was in progress or + * not. + */ + alternate_onto(&alter_machine, &cur_machine); + + /* + * Create a group machine that encloses the group and marks + * it with a group number. We assigned the group number + * when we parsed the open paren, so read that group number + * from the stack. + * + * Note that this will leave 'new_machine' with the entire + * group machine. + */ + build_group(&new_machine, &alter_machine, + group_stack[group_stack_level].group_id); + + /* + * Pop the stack - restore the alternation and current + * machines that were in progress before the group started. + */ + cur_machine = group_stack[group_stack_level].old_cur; + alter_machine = group_stack[group_stack_level].old_alter; + + /* + * Check the group expression (in new_machine) for postfix + * expressions + */ + goto apply_postfix; + + case '|': + /* + * Start a new alternation. This ends the current + * alternation; if we have a previous pending alternate, + * build an alternation machine out of the previous + * alternate and the current machine and move that to the + * alternate; otherwise, simply move the current machine to + * the pending alternate. + */ + alternate_onto(&alter_machine, &cur_machine); + + /* + * the alternation starts out with a blank slate, so null + * out the current machine + */ + cur_machine.build_null_machine(); + break; + + case '%': + // quoted character - skip the quote mark and see what we have + ++expr; + --exprlen; + + // check to see if we're at the end of the expression + if (exprlen == 0) { + /* + * end of the string - ignore it, but undo the extra + * increment of the expression index so that we exit the + * enclosing loop properly + */ + --expr; + ++exprlen; + break; + } + + // see what we have + switch(*expr) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + // group match - build a new literal group recognizer + build_group_matcher(&new_machine, (int)(*expr - '1')); + + // apply any postfix expression to the group recognizer + goto apply_postfix; + + case '<': + // build a beginning-of-word recognizer + build_char(&new_machine, RE_WORD_BEGIN); + + // it can't be postfixed - just concatenate it + concat_onto(&cur_machine, &new_machine); + break; + + case '>': + // build an end-of-word recognizer */ + build_char(&new_machine, RE_WORD_END); + + // it can't be postfixed - just concatenate it + concat_onto(&cur_machine, &new_machine); + break; + + case 'w': + // word character + build_char(&new_machine, RE_WORD_CHAR); + goto apply_postfix; + + case 'W': + // non-word character + build_char(&new_machine, RE_NON_WORD_CHAR); + goto apply_postfix; + + case 'b': + // word boundary + build_char(&new_machine, RE_WORD_BOUNDARY); + + // it can't be postfixed + concat_onto(&cur_machine, &new_machine); + break; + + case 'B': + // not a word boundary + build_char(&new_machine, RE_NON_WORD_BOUNDARY); + + // it can't be postfixed + concat_onto(&cur_machine, &new_machine); + break; + + default: + // build a new literal character recognizer + build_char(&new_machine, *expr); + + // apply any postfix expression to the character + goto apply_postfix; + } + break; + + case '.': + /* + * wildcard character - build a single character recognizer + * for the special wildcard symbol, then go check it for a + * postfix operator + */ + build_char(&new_machine, RE_WILDCARD); + goto apply_postfix; + break; + + case '[': { + // range expression + int is_exclusive = false; + unsigned char set[32]; + + // clear out the set of characters in the range + memset(set, 0, sizeof(set)); + + // first, skip the open bracket + ++expr; + --exprlen; + + // check to see if starts with the exclusion character + if (exprlen != 0 && *expr == '^') { + // skip the exclusion specifier + ++expr; + --exprlen; + + // note it + is_exclusive = true; + } + + // if the first character is a ']', include it in the range + if (exprlen != 0 && *expr == ']') { + re_set_bit(set, (int)']'); + ++expr; + --exprlen; + } + + // if the next character is a '-', include it in the range + if (exprlen != 0 && *expr == '-') { + re_set_bit(set, (int)'-'); + ++expr; + --exprlen; + } + + // scan the character set + while (exprlen != 0 && *expr != ']') { + int ch; + + // note this character + ch = (int)(unsigned char)*expr; + + // set it + re_set_bit(set, ch); + + // skip this character of the expression + ++expr; + --exprlen; + + // check for a range + if (exprlen != 0 && *expr == '-') { + int ch2; + + // skip the '-' + ++expr; + --exprlen; + if (exprlen != 0) { + // get the other end of the range + ch2 = (int)(unsigned char)*expr; + + // skip the second character + ++expr; + --exprlen; + + // if the range is reversed, swap it + if (ch > ch2) + SWAP(ch, ch2); + + // fill in the range + for ( ; ch <= ch2 ; ++ch) + re_set_bit(set, ch); + } + } + } + + // create a character range machine + build_char_range(&new_machine, set, is_exclusive); + + // apply any postfix operator + goto apply_postfix; + break; + } + + default: + normal_char: + /* + * it's an ordinary character - build a single character + * recognizer machine, and then concatenate it onto any + * existing machine + */ + build_char(&new_machine, *expr); + + apply_postfix: + /* + * Check for a postfix operator, and apply it to the machine + * in 'new_machine' if present. In any case, concatenate + * the 'new_machine' (modified by a postix operator or not) + * to the current machien. + */ + if (exprlen > 1) { + switch(*(expr+1)) { + case '*': + case '+': + case '?': + /* + * We have a postfix closure operator. Build a new + * closure machine out of 'new_machine'. + */ + { + re_machine closure_machine; + + // move onto the closure operator + ++expr; + --exprlen; + + // build the closure machine + build_closure(&closure_machine, &new_machine, *expr); + + // replace the original machine with the closure + new_machine = closure_machine; + + /* + * skip any redundant closure symbols, keeping + * only the first one we saw + */ + while (exprlen > 1 && (*(expr+1) == '?' + || *(expr+1) == '+' + || *(expr+1) == '*')) { + ++expr; + --exprlen; + } + } + break; + + default: + /* no postfix operator */ + break; + } + } + + /* + * Concatenate the new machine onto the current machine + * under construction. + */ + concat_onto(&cur_machine, &new_machine); + break; + } + } + + // complete any pending alternation + alternate_onto(&alter_machine, &cur_machine); + + // store the resulting machine in the caller's machine descriptor + *result_machine = alter_machine; + + // no errors encountered + return RE_STATUS_SUCCESS; +} + +void re_context::note_group(re_group_register *regs, re_state_id id, const char *p) { + int group_index; + + /* + * Check to see if this is a valid state and it's a group marker - + * if not, there's nothing to do + */ + if (id == RE_STATE_INVALID + || !(_tuple_arr[id].flags + & (RE_STATE_GROUP_BEGIN | RE_STATE_GROUP_END)) + || (group_index = (int)_tuple_arr[id].ch) >= RE_GROUP_REG_CNT) + return; + + // It's a valid group marker - note the appropriate register value + if ((_tuple_arr[id].flags & RE_STATE_GROUP_BEGIN) != 0) + regs[group_index].start_ofs = p; + else + regs[group_index].end_ofs = p; +} + +bool re_context::is_word_char(char c) const { + return Common::isAlnum(c); +} + +int re_context::match(const char *entire_str, const char *str, size_t origlen, + const re_machine *machine, re_group_register *regs) { + re_state_id cur_state; + const char *p; + size_t curlen; + + // start at the machine's initial state + cur_state = machine->init; + + // start at the beginning of the string + p = str; + curlen = origlen; + + // note any group involved in the initial state + note_group(regs, cur_state, p); + + /* + * if we're starting in the final state, immediately return success + * with a zero-length match + */ + if (cur_state == machine->final) { + // return success with a zero-length match + return 0; + } + + // run the machine + for (;;) { + re_tuple *tuple; + + // get the tuple for this state + tuple = &_tuple_arr[cur_state]; + + // if this is a group state, adjust the group registers + note_group(regs, cur_state, p); + + // see what kind of state we're in + if (!(tuple->flags & (RE_STATE_GROUP_BEGIN | RE_STATE_GROUP_END)) + && tuple->ch != RE_EPSILON) { + /* + * This is a character or group recognizer state. If we + * match the character or group, continue on to the next + * state; otherwise, return failure. + */ + switch(tuple->ch) { + case RE_GROUP_MATCH_0: + case RE_GROUP_MATCH_0 + 1: + case RE_GROUP_MATCH_0 + 2: + case RE_GROUP_MATCH_0 + 3: + case RE_GROUP_MATCH_0 + 4: + case RE_GROUP_MATCH_0 + 5: + case RE_GROUP_MATCH_0 + 6: + case RE_GROUP_MATCH_0 + 7: + case RE_GROUP_MATCH_0 + 8: + case RE_GROUP_MATCH_0 + 9: { + int group_num; + re_group_register *group_reg; + size_t reg_len; + + // it's a group - get the group number + group_num = tuple->ch - RE_GROUP_MATCH_0; + group_reg = ®s[group_num]; + + /* + * if this register isn't defined, there's nothing + * to match, so fail + */ + if (group_reg->start_ofs == 0 || group_reg->end_ofs == 0) + return -1; + + // calculate the length of the register value + reg_len = group_reg->end_ofs - group_reg->start_ofs; + + // if we don't have enough left to match, it fails + if (curlen < reg_len) + return -1; + + // if the string doesn't match exactly, we fail + if (memcmp(p, group_reg->start_ofs, reg_len) != 0) + return -1; + + /* + * It matches exactly - skip the entire length of + * the register in the source string + */ + p += reg_len; + curlen -= reg_len; + break; + } + + case RE_TEXT_BEGIN: + /* + * Match only the exact beginning of the string - if + * we're anywhere else, this isn't a match. If this + * succeeds, we don't skip any characters. + */ + if (p != entire_str) + return -1; + break; + + case RE_TEXT_END: + /* + * Match only the exact end of the string - if we're + * anywhere else, this isn't a match. Don't skip any + * characters on success. + */ + if (curlen != 0) + return -1; + break; + + case RE_WORD_BEGIN: + /* + * if the previous character is a word character, we're + * not at the beginning of a word + */ + if (p != entire_str && is_word_char(*(p - 1))) + return -1; + + /* + * if we're at the end of the string, or the current + * character isn't the start of a word, we're not at the + * beginning of a word + */ + if (curlen == 0 || !is_word_char(*p)) + return -1; + break; + + case RE_WORD_END: + /* + * if the current character is a word character, we're not + * at the end of a word + */ + if (curlen != 0 && is_word_char(*p)) + return -1; + + /* + * if we're at the beginning of the string, or the + * previous character is not a word character, we're not + * at the end of a word + */ + if (p == entire_str || !is_word_char(*(p - 1))) + return -1; + break; + + case RE_WORD_CHAR: + /* if it's not a word character, it's a failure */ + if (curlen == 0 || !is_word_char(*p)) + return -1; + + /* skip this character of input */ + ++p; + --curlen; + break; + + case RE_NON_WORD_CHAR: + /* if it's a word character, it's a failure */ + if (curlen == 0 || is_word_char(*p)) + return -1; + + /* skip the input */ + ++p; + --curlen; + break; + + case RE_WORD_BOUNDARY: + case RE_NON_WORD_BOUNDARY: + { + int prev_is_word; + int next_is_word; + int boundary; + + /* + * Determine if the previous character is a word + * character -- if we're at the beginning of the + * string, it's obviously not, otherwise check its + * classification + */ + prev_is_word = (p != entire_str + && is_word_char(*(p - 1))); + + /* make the same check for the current character */ + next_is_word = (curlen != 0 + && is_word_char(*p)); + + /* + * Determine if this is a boundary - it is if the + * two states are different + */ + boundary = ((prev_is_word != 0) ^ (next_is_word != 0)); + + /* + * make sure it matches what was desired, and return + * failure if not + */ + if ((tuple->ch == RE_WORD_BOUNDARY && !boundary) + || (tuple->ch == RE_NON_WORD_BOUNDARY && boundary)) + return -1; + } + break; + + case RE_WILDCARD: + // make sure we have a character to match + if (curlen == 0) + return -1; + + // skip this character + ++p; + --curlen; + break; + + case RE_RANGE: + case RE_RANGE_EXCL: { + int match; + + // make sure we have a character to match + if (curlen == 0) + return -1; + + // see if we match + match = re_is_bit_set(tuple->char_range, + (int)(unsigned char)*p); + + // make sure we got what we wanted + if ((tuple->ch == RE_RANGE && !match) + || (tuple->ch == RE_RANGE_EXCL && match)) + return -1; + + // skip this character of the input + ++p; + --curlen; + break; + } + + default: + // make sure we have an exact match + if (curlen == 0 || tuple->ch != *p) + return -1; + + // skip this character of the input + ++p; + --curlen; + break; + } + + /* + * if we got this far, we were successful - move on to the + * next state + */ + cur_state = tuple->next_state_1; + } else if (tuple->next_state_2 == RE_STATE_INVALID) { + /* + * We have only one transition, so this state is entirely + * deterministic. Simply move on to the next state. + */ + cur_state = tuple->next_state_1; + } else { + re_machine sub_machine; + re_group_register regs1[RE_GROUP_REG_CNT]; + re_group_register regs2[RE_GROUP_REG_CNT]; + int ret1; + int ret2; + + /* + * This state has two possible transitions, and we don't + * know which one to take. So, try both, see which one + * works better, and return the result. Try the first + * transition first. Note that each separate attempt must + * use a separate copy of the registers. + */ + memcpy(regs1, regs, sizeof(regs1)); + sub_machine.init = tuple->next_state_1; + sub_machine.final = machine->final; + ret1 = match(entire_str, p, curlen, &sub_machine, regs1); + + /* + * Now try the second transition + */ + memcpy(regs2, regs, sizeof(regs2)); + sub_machine.init = tuple->next_state_2; + sub_machine.final = machine->final; + ret2 = match(entire_str, p, curlen, &sub_machine, regs2); + + /* + * If they both failed, the whole thing failed. Otherwise, + * return the longer of the two, plus the length we + * ourselves matched previously. Note that we return the + * register set from the winning match. + */ + if (ret1 < 0 && ret2 < 0) { + // they both failed + return -1; + } else if (ret1 > ret2) { + // use the first register set and result length + memcpy(regs, regs1, sizeof(regs1)); + return ret1 + (p - str); + } else { + // use the second register set and result length + memcpy(regs, regs2, sizeof(regs2)); + return ret2 + (p - str); + } + } + + // If we're in the final state, return success + if (cur_state == machine->final) { + // finish off any group involved in the final state + note_group(regs, cur_state, p); + + // return the length we matched + return p - str; + } + } +} + +int re_context::search(const char *str, size_t len, const re_machine *machine, + re_group_register *regs, int *result_len) { + int ofs; + + /* + * Starting at the first character in the string, search for the + * pattern at each subsequent character until we either find the + * pattern or run out of string to test. + */ + for (ofs = 0 ; ofs < (int)len ; ++ofs) { + int matchlen; + + // check for a match + matchlen = match(str, str + ofs, len - ofs, machine, regs); + if (matchlen >= 0) { + // we found a match here - return the length and offset + *result_len = matchlen; + return ofs; + } + } + + // we didn't find a match + return -1; +} + +void re_context::save_search_str(const char *str, size_t len) { + // if the string is empty, this is easy + if (len == 0) { + // nothing to store - just save the length and return + _curlen = 0; + return; + } + + // if the current buffer isn't big enough, allocate a new one + if (_strbuf == 0 || _strbufsiz < len) { + /* + * free any previous buffer - its contents are no longer + * important, since we're about to overwrite it with a new + * string + */ + if (_strbuf != 0) + mchfre(_strbuf); + + /* + * allocate a new buffer; round up to the next 256-byte + * increment to make sure we're not constantly reallocating to + * random sizes + */ + _strbufsiz = ((len + 255) & ~255); + + // allocate it + _strbuf = (char *)mchalo(_errctx, _strbufsiz, "regex str"); + } + + // copy the string + memcpy(_strbuf, str, len); + + // save the length + _curlen = len; +} + +int re_context::compile_and_search(const char *pattern, size_t patlen, + const char *searchstr, size_t searchlen, int *result_len) { + re_machine machine; + + // compile the expression - return failure if we get an error + if (compile(pattern, patlen, &machine) != RE_STATUS_SUCCESS) + return -1; + + // save the search string in our internal buffer + save_search_str(searchstr, searchlen); + + // clear the group registers + memset(_regs, 0, sizeof(_regs)); + + /* + * search for the pattern in our copy of the string - use the copy + * so that the group registers stay valid even if the caller + * deallocates the original string after we return + */ + return search(_strbuf, _curlen, &machine, _regs, result_len); +} + +int re_context::compile_and_match(const char *pattern, size_t patlen, + const char *searchstr, size_t searchlen) { + re_machine machine; + + // compile the expression - return failure if we get an error + if (compile(pattern, patlen, &machine) != RE_STATUS_SUCCESS) + return 0; + + // save the search string in our internal buffer + save_search_str(searchstr, searchlen); + + // clear the group registers + memset(_regs, 0, sizeof(_regs)); + + // match the string + return match(_strbuf, _strbuf, _curlen, &machine, _regs); +} + +} // End of namespace TADS2 +} // End of namespace TADS +} // Engine of namespace GLK diff --git a/engines/glk/tads/tads2/regex.h b/engines/glk/tads/tads2/regex.h new file mode 100644 index 0000000000..9040a029f3 --- /dev/null +++ b/engines/glk/tads/tads2/regex.h @@ -0,0 +1,315 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef GLK_TADS_TADS2_REGEX +#define GLK_TADS_TADS2_REGEX + +#include "common/array.h" +#include "engines/glk/tads/tads2/ler.h" + +namespace Glk { +namespace TADS { +namespace TADS2 { + +/** + * state ID + */ +typedef int re_state_id; + +/** + * invalid state ID - used to mark null machines + */ +#define RE_STATE_INVALID ((re_state_id)-1) + +/** + * first valid state ID + */ +#define RE_STATE_FIRST_VALID ((re_state_id)0) + + +/** + * Group register structure. Each register keeps track of the starting + * and ending offset of the group's text. + */ +struct re_group_register { + const char *start_ofs; + const char *end_ofs; +}; + +/** + * number of group registers we keep + */ +#define RE_GROUP_REG_CNT 10 + +/** + * Denormalized state transition tuple. Each tuple represents the + * complete set of transitions out of a particular state. A particular + * state can have one character transition, or two epsilon transitions. + * Note that we don't need to store the state ID in the tuple, because + * the state ID is the index of the tuple in an array of state tuples. + */ +struct re_tuple { + // the character we must match to transition to the target state + char ch; + + // the target states + re_state_id next_state_1; + re_state_id next_state_2; + + // character range match table, if used + unsigned char *char_range; + + // flags + byte flags; +}; + + +/** + * Tuple flags + */ +enum { + // this state is the start of a group - the 'ch' value is the group ID + RE_STATE_GROUP_BEGIN = 0x02, + + // this state is the end of a group - 'ch' is the group ID */ + RE_STATE_GROUP_END = 0x04 +}; + +/** + * Status codes + */ +typedef enum { + // success + RE_STATUS_SUCCESS = 0, + + // compilation error - group nesting too deep + RE_STATUS_GROUP_NESTING_TOO_DEEP +} re_status_t; + + +/** + * Regular expression compilation. This tracks the state of the compilation and + * stores the resources associated with the compiled expression. + */ +class re_context { + /** + * A machine description. Machines are fully described by their initial + * and final state ID's. + */ + struct re_machine { + re_state_id init; ///< the machine's initial state + re_state_id final; ///< the machine's final state + + re_machine() : init(0), final(0) {} + + /** + * Build a null machine + */ + void build_null_machine() { + init = final = RE_STATE_INVALID; + } + + /** + * Determine if a machine is null + */ + bool isNull() const { + return (init == RE_STATE_INVALID); + } + }; +private: + /** + * Reset compiler - clears states and tuples + */ + void reset(); + + /** + * Set a transition from a state to a given destination state + */ + void set_trans(re_state_id id, re_state_id dest_id, char ch); + + /** + * Initialize a new machine, giving it an initial and final state + */ + void init_machine(re_machine *machine); + + /** + * Build a character recognizer + */ + void build_char(re_machine *machine, char ch); + + /** + * Build a character range recognizer. 'range' is a 256-bit (32-byte) bit vector. + */ + void build_char_range(re_machine *machine, unsigned char *range, int exclusion); + + /** + * Build a group recognizer. This is almost the same as a character + * recognizer, but matches a previous group rather than a literal character. + */ + void build_group_matcher(re_machine *machine, int group_num); + + /** + * Build a concatenation recognizer + */ + void build_concat(re_machine *new_machine, re_machine *lhs, re_machine *rhs); + + /** + * Build a group machine. sub_machine contains the machine that + * expresses the group's contents; we'll fill in new_machine with a + * newly-created machine that encloses and marks the group. + */ + void build_group(re_machine *new_machine, re_machine *sub_machine, int group_id); + + /** + * Build an alternation recognizer + */ + void build_alter(re_machine *new_machine, re_machine *lhs, re_machine *rhs); + + /** + * Build a closure recognizer + */ + void build_closure(re_machine *new_machine, re_machine *sub, char specifier); + + /** + * Concatenate the second machine onto the first machine, replacing the + * first machine with the resulting machine. If the first machine is a + * null machine (created with re_build_null_machine), we'll simply copy + * the second machine into the first. + */ + void concat_onto(re_machine *dest, re_machine *rhs); + + /** + * Alternate the second machine onto the first machine, replacing the + * first machine with the resulting machine. If the first machine is a + * null machine, this simply replaces the first machine with the second + * machine. If the second machine is null, this simply leaves the first + * machine unchanged. + */ + void alternate_onto(re_machine *dest, re_machine *rhs); + + /** + * Compile an expression + */ + re_status_t compile(const char *expr, size_t exprlen, re_machine *result_machine); + + /** + * Note a group position if appropriate + */ + void note_group(re_group_register *regs, re_state_id id, const char *p); + + /** + * Determine if a character is part of a word. We consider letters and + * numbers to be word characters. + */ + bool is_word_char(char c) const; + + /** + * Match a string to a compiled expression. Returns the length of the + * match if successful, or -1 if no match was found. + */ + int match(const char *entire_str, const char *str, size_t origlen, + const re_machine *machine, re_group_register *regs); + + /** + * Search for a regular expression within a string. Returns -1 if the string + * cannot be found, otherwise returns the offset from the start of the string + * to be searched of the start of the first match for the pattern. + */ + int search(const char *str, size_t len, const re_machine *machine, + re_group_register *regs, int *result_len); + + /** + * Make a copy of a search string in our private buffer. + */ + void save_search_str(const char *str, size_t len); +public: + errcxdef *_errctx; ///< error context + re_state_id _next_state; ///< next available state ID + + /** + * The array of transition tuples. We'll allocate this array and + * expand it as necessary. + */ + Common::Array _tuple_arr; + + // current group ID + int _cur_group; + + // group registers + re_group_register _regs[RE_GROUP_REG_CNT]; + + /** + * Buffer for retaining a copy of the last string we scanned. We + * retain our own copy of each string, and point the group registers + * into this copy rather than the caller's original string -- this + * ensures that the group registers remain valid even after the + * caller has deallocated the original string. + */ + char *_strbuf; + + /** + * length of the string currently in the buffer + */ + size_t _curlen; + + /** + * size of the buffer allocated to strbuf + */ + size_t _strbufsiz; +public: + /** + * Constructor. The memory for the context structure itself + * must be allocated and maintained by the caller. + */ + re_context(errcxdef *errctx); + + /** + * Destructor + */ + ~re_context(); + + /** + * Allocate a new state ID + */ + re_state_id alloc_state(); + + /** + * Compile an expression and search for a match within the given string. + * Returns the offset of the match, or -1 if no match was found. + */ + int compile_and_search(const char *pattern, size_t patlen, + const char *searchstr, size_t searchlen, int *result_len); + + /** + * Compile an expression and check for a match. Returns the length of the match + * if we found a match, -1 if we found no match. This is not a search function; + * we merely match the leading substring of the given string to the given pattern. + */ + int compile_and_match(const char *pattern, size_t patlen, + const char *searchstr, size_t searchlen); +}; + +} // End of namespace TADS2 +} // End of namespace TADS +} // Engine of namespace GLK + +#endif diff --git a/engines/glk/tads/tads2/tads2.cpp b/engines/glk/tads/tads2/tads2.cpp index 9c50c20d24..79a36fa0ff 100644 --- a/engines/glk/tads/tads2/tads2.cpp +++ b/engines/glk/tads/tads2/tads2.cpp @@ -27,6 +27,7 @@ namespace TADS { namespace TADS2 { TADS2::TADS2(OSystem *syst, const GlkGameDescription &gameDesc) : OS(syst, gameDesc) { + cmap_init_default(); } void TADS2::runGame(Common::SeekableReadStream *gameFile) { diff --git a/engines/glk/tads/tads2/tads2.h b/engines/glk/tads/tads2/tads2.h index 2c799a36e7..f0c42b3574 100644 --- a/engines/glk/tads/tads2/tads2.h +++ b/engines/glk/tads/tads2/tads2.h @@ -30,6 +30,27 @@ namespace Glk { namespace TADS { namespace TADS2 { +/** + * map a native character (read externally) into an internal character + */ +#define cmap_n2i(c) (G_cmap_input[(unsigned char)(c)]) + +/** + * map an internal character into a native character (for display) + */ +#define cmap_i2n(c) (G_cmap_output[(unsigned char)(c)]) + +/** + * the full name (for display purposes) of the loaded character set + */ +#define CMAP_LDESC_MAX_LEN 40 + +/** + * Maximum expansion for an HTML entity mapping + */ +#define CMAP_MAX_ENTITY_EXPANSION 50 + + /** * TADS 2 game interpreter */ @@ -37,21 +58,48 @@ class TADS2 : public OS { private: // STUBS void os_printz(const Common::String &s) {} -public: + void tio_set_html_expansion(unsigned int html_char_val, + const char *expansion, size_t expansion_len) {} +private: /** - * Constructor + * \defgroup cmap + * @{ */ - TADS2(OSystem *syst, const GlkGameDescription &gameDesc); /** - * Execute the game + * flag: true -> a character set has been explicitly loaded, so we + * should ignore any game character set setting */ - virtual void runGame(Common::SeekableReadStream *gameFile) override; + bool S_cmap_loaded; /** - * Returns the running interpreter type + * input-mapping table - for native character 'n', cmap_input[n] yields + * the internal character code + */ + unsigned char G_cmap_input[256]; + + /** + * output-mapping table - for internal character 'n', cmap_output[n] + * yields the output character code + */ + unsigned char G_cmap_output[256]; + + /** + * the ID of the loaded character set + */ + char G_cmap_id[5]; + + /** + * the full name (for display purposes) of the loaded character set + */ + char G_cmap_ldesc[CMAP_LDESC_MAX_LEN + 1]; + + /**@}*/ +private: + /** + * \defgroup trd + * @{ */ - virtual InterpreterType getInterpreterType() const override { return INTERPRETER_TADS2; } void trdmain1(errcxdef *errctx); @@ -59,6 +107,72 @@ public: * printf-style formatting */ void trdptf(const char *fmt, ...); + + /**@}*/ + + /** + * \defgroup cmap + * @{ + */ + + /** + * Initialize the default character mappings. If no mapping file is to + * be read, this function will establish identify mappings that leave + * characters untranslated. + */ + void cmap_init_default(); + + /** + * Load a character map file. Returns zero on success, non-zero on + * failure. If filename is null, we'll use the default mapping. + */ + int cmap_load(const char *filename); + + /** + * Turn off character translation. This overrides any game character + * set that we find and simply uses the default translation. + */ + void cmap_override(void); + + /** + * Set the game's internal character set. This should be called when a + * game is loaded, and the game specifies an internal character set. If + * there is no character map file explicitly loaded, we will attempt to + * load a character mapping file that maps this character set to the + * current native character set. Signals an error on failure. This + * routine will succeed (without doing anything) if a character set has + * already been explicitly loaded, since an explicitly-loaded character + * set overrides the automatic character set selection that we attempt + * when loading a game. + * + * argv0 must be provided so that we know where to look for our mapping + * file on systems where mapping files are stored in the same directory + * as the TADS executables. + */ + void cmap_set_game_charset(errcxdef *errctx, const char *internal_id, + const char *internal_ldesc, const char *argv0); + + /** + * Internal routine to load a character map from a file + */ + int cmap_load_internal(const char *filename); + + /**@}*/ +public: + /** + * Constructor + */ + TADS2(OSystem *syst, const GlkGameDescription &gameDesc); + + /** + * Execute the game + */ + virtual void runGame(Common::SeekableReadStream *gameFile) override; + + /** + * Returns the running interpreter type + */ + virtual InterpreterType getInterpreterType() const override { return INTERPRETER_TADS2; } }; typedef TADS2 appctxdef; diff --git a/engines/glk/tads/tads2/tads2_cmap.cpp b/engines/glk/tads/tads2/tads2_cmap.cpp new file mode 100644 index 0000000000..cf63c9a763 --- /dev/null +++ b/engines/glk/tads/tads2/tads2_cmap.cpp @@ -0,0 +1,268 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include "glk/tads/tads2/tads2.h" +#include "glk/tads/tads2/types.h" + +namespace Glk { +namespace TADS { +namespace TADS2 { + +/** + * Signatures for character map files. The signature is stored at the + * beginning of the file. + */ +// single-byte character map version 1.0.0 +#define CMAP_SIG_S100 "TADS2 charmap S100\n\r\01a" + +void TADS2::cmap_init_default() { + size_t i; + + // initialize the input table + for (i = 0 ; i < sizeof(G_cmap_input)/sizeof(G_cmap_input[0]) ; ++i) + G_cmap_input[i] = (unsigned char)i; + + // initialize the output table + for (i = 0 ; i < sizeof(G_cmap_output)/sizeof(G_cmap_output[0]) ; ++i) + G_cmap_output[i] = (unsigned char)i; + + // we have a null ID + memset(G_cmap_id, 0, sizeof(G_cmap_id)); + + // indicate that it's the default + strcpy(G_cmap_ldesc, "(native/no mapping)"); + + // note that we have no character set loaded + S_cmap_loaded = false; +} + +int TADS2::cmap_load_internal(const char *filename) { + osfildef *fp; + static char sig1[] = CMAP_SIG_S100; + char buf[256]; + uchar lenbuf[2]; + size_t len; + int sysblk; + + // if there's no mapping file, use the default mapping + if (filename == 0) { + // initialize with the default mapping + cmap_init_default(); + + // return success + return 0; + } + + // open the file + fp = osfoprb(filename, OSFTCMAP); + if (fp == 0) + return 1; + + // check the signature + if (osfrb(fp, buf, sizeof(sig1)) + || memcmp(buf, sig1, sizeof(sig1)) != 0) { + osfcls(fp); + return 2; + } + + // load the ID + G_cmap_id[4] = '\0'; + if (osfrb(fp, G_cmap_id, 4)) { + osfcls(fp); + return 3; + } + + // load the long description + if (osfrb(fp, lenbuf, 2) + || (len = osrp2(lenbuf)) > sizeof(G_cmap_ldesc) + || osfrb(fp, G_cmap_ldesc, len)) { + osfcls(fp); + return 4; + } + + // load the two tables - input, then output + if (osfrb(fp, G_cmap_input, sizeof(G_cmap_input)) + || osfrb(fp, G_cmap_output, sizeof(G_cmap_output))) { + osfcls(fp); + return 5; + } + + // read the next section header + if (osfrb(fp, buf, 4)) { + osfcls(fp); + return 6; + } + + // if it's "SYSI", read the system information string + if (!memcmp(buf, "SYSI", 4)) { + // read the length prefix, then the string + if (osfrb(fp, lenbuf, 2) + || (len = osrp2(lenbuf)) > sizeof(buf) + || osfrb(fp, buf, len)) { + osfcls(fp); + return 7; + } + + // we have a system information block + sysblk = true; + } else { + // there's no system information block + sysblk = false; + } + + /* + * call the OS code, so that it can do any system-dependent + * initialization for the new character mapping + */ + os_advise_load_charmap(G_cmap_id, G_cmap_ldesc, sysblk ? buf : ""); + + // read the next section header + if (sysblk && osfrb(fp, buf, 4)) { + osfcls(fp); + return 8; + } + + // see if we have an entity list + if (!memcmp(buf, "ENTY", 4)) { + // read the entities + for (;;) { + unsigned int cval; + char expansion[CMAP_MAX_ENTITY_EXPANSION]; + + // read the next item's length and character value + if (osfrb(fp, buf, 4)) { + osfcls(fp); + return 9; + } + + // decode the values + len = osrp2(buf); + cval = osrp2(buf+2); + + // if we've reached the zero marker, we're done + if (len == 0 && cval == 0) + break; + + // read the string + if (len > CMAP_MAX_ENTITY_EXPANSION + || osfrb(fp, expansion, len)) { + osfcls(fp); + return 10; + } + + // tell the output code about the expansion + tio_set_html_expansion(cval, expansion, len); + } + } + + /* + * ignore anything else we find - if the file format is updated to + * include extra information in the future, and this old code tries + * to load an updated file, we'll just ignore the new information, + * which should always be placed after the "SYSI" block (if present) + * to ensure compatibility with past versions (such as this code) + */ + // no problems - close the file and return success + osfcls(fp); + return 0; +} + +int TADS2::cmap_load(const char *filename) { + int err; + + // try loading the file + if ((err = cmap_load_internal(filename)) != 0) + return err; + + /* + * note that we've explicitly loaded a character set, if they named + * a character set (if not, this simply establishes the default + * setting, so we haven't explicitly loaded anything) + */ + if (filename != nullptr) + S_cmap_loaded = true; + + // success + return 0; +} + +void TADS2::cmap_override() { + // apply the default mapping + cmap_init_default(); + + /* + * pretend we have a character map loaded, so that we don't try to + * load another one if the game specifies a character set + */ + S_cmap_loaded = true; +} + +void TADS2::cmap_set_game_charset(errcxdef *ec, const char *internal_id, + const char *internal_ldesc, const char *argv0) { + char filename[OSFNMAX]; + + /* + * If a character set is already explicitly loaded, ignore the + * game's character set - the player asked us to use a particular + * mapping, so ignore what the game wants. (This will probably + * result in incorrect display of non-ASCII character values, but + * the player is most likely to use this to avoid errors when an + * appropriate mapping file for the game is not available. In this + * case, the player informs us by setting the option that he or she + * knows and accepts that the game will not look exactly right.) + */ + if (S_cmap_loaded) + return; + + /* + * ask the operating system to name the mapping file -- this routine + * will determine, if possible, the current native character set, + * and apply a system-specific naming convention to tell us what + * mapping file we should open + */ + os_gen_charmap_filename(filename, internal_id, argv0); + + // try loading the mapping file + if (cmap_load_internal(filename)) + errsig2(ec, ERR_CHRNOFILE, + ERRTSTR, errstr(ec, filename, strlen(filename)), + ERRTSTR, errstr(ec, internal_ldesc, strlen(internal_ldesc))); + + /** + * We were successful - the game's internal character set is now + * mapped to the current native character set. Even though we + * loaded an ldesc from the mapping file, forget that and store the + * internal ldesc that the game specified. The reason we do this is + * that it's possible that the player will dynamically switch native + * character sets in the future, at which point we'll need to + * re-load the mapping table, which could raise an error if a + * mapping file for the new character set isn't available. So, we + * may need to provide the same explanation later that we needed to + * provide here. Save the game's character set ldesc for that + * eventuality, since it describes exactly what the *game* wanted. + */ + strcpy(G_cmap_ldesc, internal_ldesc); +} + +} // End of namespace TADS2 +} // End of namespace TADS +} // End of namespace Glk diff --git a/engines/glk/tads/tads2/types.h b/engines/glk/tads/tads2/types.h new file mode 100644 index 0000000000..52a530939b --- /dev/null +++ b/engines/glk/tads/tads2/types.h @@ -0,0 +1,188 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef GLK_TADS_TADS2_TYPES +#define GLK_TADS_TADS2_TYPES + +namespace Glk { +namespace TADS { +namespace TADS2 { + +typedef unsigned char uchar; +typedef Common::SeekableReadStream osfildef; + +/** + * Allocate a memory block + */ +#define mchalo(CTX, SIZE, COMMENT) ((byte *)new byte[SIZE]) + +/** + * Free a memory block + */ +#define mchfre(PTR) delete[] (byte *)PTR + +/** + * Close a file + */ +#define osfcls(FP) delete FP + +/** + * Read from a file + */ +#define osfrb(FP, BUF, SIZE) FP->read(BUF, SIZE) + +/** + * Read a 16-bit integer from memory + */ +#define osrp2(MEM) READ_LE_UINT16(MEM) + +/** + * The character (or characters) which mark the beginning of a special fileref string. + * The important thing is that the string be one that is either not allowed in + * filenames on your platform or is unlikely to be the first part of a filename. + */ +#define OSS_FILEREF_STRING_PREFIX ":" + +/** + * The character (or characters) which mark the end of a special fileref string. + * Using this and OSS_FILEREF_STRING_PREFIX, you should be able to come up with + * something which forms an invalid filename + */ +#define OSS_FILEREF_STRING_SUFFIX "" + +/** + * Maximum length of status line text + */ +#define OSS_STATUS_STRING_LEN 80 + +/** + * Maximum size for filenames + */ +#define OSFNMAX 1024 + +/** + * Important note: do not change these values when porting TADS. These + * values can be used by games, so they must be the same on all platforms. + */ +enum { + OS_AFP_OPEN = 1, ///< choose an existing file to open for reading + OS_AFP_SAVE = 2 ///< choose a filename for saving to a file +}; + +/** + * File types.These type codes are used when opening or creating a file, + * so that the OS routine can set appropriate file system metadata + * to describe or find the file type. + * + * The type os_filetype_t is defined for documentary purposes; it's + * always just an int. + */ +enum os_filetype_t { + OSFTGAME = 0, ///< a game data file (.gam) + OSFTSAVE = 1, ///< a saved game (.sav) + OSFTLOG = 2, ///< a transcript (log) file + OSFTSWAP = 3, ///< swap file + OSFTDATA = 4, ///< user data file (used with the TADS fopen() call) + OSFTCMD = 5, ///< QA command/log file + OSFTERRS = 6, ///< error message file + OSFTTEXT = 7, ///< text file - used for source files + OSFTBIN = 8, ///< binary file of unknown type - resources, etc + OSFTCMAP = 9, ///< character mapping file + OSFTPREF = 10, ///< preferences file + OSFTUNK = 11, ///< unknown - as a filter, matches any file type + OSFTT3IMG = 12, ///< T3 image file (.t3 - formerly .t3x) + OSFTT3OBJ = 13, ///< T3 object file (.t3o) + OSFTT3SYM = 14, ///< T3 symbol export file (.t3s) + OSFTT3SAV = 15 ///< T3 saved state file (.t3v) +}; + +/** + * Constants for os_getc() when returning commands. When used for command line + * editing, special keys (arrows, END, etc.) should cause os_getc() to return 0, + * and return the appropriate CMD_ value on the NEXT call. Hence, os_getc() must + * keep the appropriate information around statically for the next call when a + * command key is issued. + * + * The comments indicate which CMD_xxx codes are "translated" codes and which are + * "raw"; the difference is that, when a particular keystroke could be interpreted + * as two different CMD_xxx codes, one translated and the other raw, os_getc() + * should always return the translated version of the key, and os_getc_raw() + * should return the raw version. + */ +enum KeyCmd { + CMD_UP = 1, ///< move up/up arrow (translated) + CMD_DOWN = 2, ///< move down/down arrow (translated) + CMD_RIGHT = 3, ///< move right/right arrow (translated) + CMD_LEFT = 4, ///< move left/left arrow (translated) + CMD_END = 5, ///< move cursor to end of line (translated) + CMD_HOME = 6, ///< move cursor to start of line (translated) + CMD_DEOL = 7, ///< delete to end of line (translated) + CMD_KILL = 8, ///< delete entire line (translated) + CMD_DEL = 9, ///< delete current character (translated) + CMD_SCR = 10, ///< toggle scrollback mode (translated) + CMD_PGUP = 11, ///< page up (translated) + CMD_PGDN = 12, ///< page down (translated) + CMD_TOP = 13, ///< top of file (translated) + CMD_BOT = 14, ///< bottom of file (translated) + CMD_F1 = 15, ///< function key F1 (raw) + CMD_F2 = 16, ///< function key F2 (raw) + CMD_F3 = 17, ///< function key F3 (raw) + CMD_F4 = 18, ///< function key F4 (raw) + CMD_F5 = 19, ///< function key F5 (raw) + CMD_F6 = 20, ///< function key F6 (raw) + CMD_F7 = 21, ///< function key F7 (raw) + CMD_F8 = 22, ///< function key F8 (raw) + CMD_F9 = 23, ///< function key F9 (raw) + CMD_F10 = 24, ///< function key F10 (raw) + CMD_CHOME = 25, ///< control-home (raw) + CMD_TAB = 26, ///< tab (translated) + CMD_SF2 = 27, ///< shift-F2 (raw) + ///< not used (obsolete) - 28 + CMD_WORD_LEFT = 29, ///< word left (ctrl-left on dos) (translated) + CMD_WORD_RIGHT = 30,///< word right (ctrl-right on dos) (translated) + CMD_WORDKILL = 31, ///< delete word right (translated) + CMD_EOF = 32, ///< end-of-file (raw) + CMD_BREAK = 33, ///< break (Ctrl-C or local equivalent) (translated) + CMD_INS = 34, ///< insert key (raw) + + /** + * ALT-keys - add alphabetical code to CMD_ALT: ALT-A == CMD_ALT + 0, + * ALT-B == CMD_ALT + 1, ALT-C == CMD_ALT + 2, etc + * + * These keys are all raw (untranslated). + */ + CMD_ALT = 128 ///< start of ALT keys +}; + +/** + * Status mode codes + */ +enum StatusMode { + OSS_STATUS_MODE_STORY = 0, + OSS_STATUS_MODE_STATUS = 1 +}; + +} // End of namespace TADS2 +} // End of namespace TADS +} // End of namespace Glk + +#endif -- cgit v1.2.3