diff options
Diffstat (limited to 'engines/glk/archetype/token.cpp')
-rw-r--r-- | engines/glk/archetype/token.cpp | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/engines/glk/archetype/token.cpp b/engines/glk/archetype/token.cpp new file mode 100644 index 0000000000..3be0e2365b --- /dev/null +++ b/engines/glk/archetype/token.cpp @@ -0,0 +1,441 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers || c == whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License || c == or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not || c == g_vm->write to the Free Software + * Foundation || c == Inc. || c == 51 Franklin Street || c == Fifth Floor || c == Boston || c == MA 02110-1301 || c == USA. + * + */ + +#include "glk/archetype/token.h" +#include "glk/archetype/archetype.h" +#include "glk/archetype/id_table.h" +#include "glk/archetype/misc.h" +#include "glk/archetype/keywords.h" + +namespace Glk { +namespace Archetype { + +enum StateType { START, STOP, DECIDE, WHITE, COMMENT, QUOTE, LITERAL, IDENTIFIER, NUMBER, OPERATOR }; + +bool isWhitespace(char c) { + return c == ' ' || c == '\t' || c == NEWLINE_CH; +} + +bool isLiteralType(char c) { + return c == '"' || c == '\''; +} + +bool isLetter(char c) { + return Common::isAlpha(c); +} + +bool isDigit(char c) { + return Common::isDigit(c); +} + +bool isStartChar(char c) { + return Common::isAlpha(c) || c == '_'; +} + +bool isIdChar(char c) { + return isStartChar(c) || isDigit(c); +} + +bool isLongOper(char c) { + return c == '<' || c == '>' || c == ':' || c == '+' || c == '-' || c == '*' + || c == '/' || c == '&' || c == '~'; +} + +bool isOperChar(char c) { + return isLongOper(c) || c == '=' || c == '.' || c == '^' || c == '?'; +} + +/** + * Performs a binary search on the given ordered array, passing back the + * index of the given string if it's in the array. + * Used for quickly finding an operator or reserved word. + * @param the_array ordered array of short strings + * @param elements number of elements in the array + * @param match_str string to match + * @param a_index Outputs the array index + * @returns true if match_str was an element in the_array; false otherwise + */ +static bool binary_search(const LookupType &the_array, int elements, + const ShortStringType &match_str, int &a_index) { + int left = 0, right = elements - 1, mid; + + do { + mid = (left + right) / 2; + if (match_str < the_array[mid]) + right = mid - 1; + else + left = mid + 1; + } while (match_str != the_array[mid] && left <= right); + + if (match_str != the_array[mid]) { + return false; + } else { + a_index = mid; + return true; + } +} + +/** + * Searches the given unordered xarray for a string matching the given + * string; if found, returns the index in the list of the string. If + * not found, adds it to the list. + * @param the_xarray xarray to be searched + * @param the_str string to be compared + * @returns The index of the_str in the_xarray. + */ +static int add_unique_str(XArrayType &the_xarray, const String &the_str) { + StringPtr new_str; + int i; + void *p; + + // Duplicate the given string + new_str = NewConstStr(the_str); + + if (the_xarray.empty()) { + append_to_xarray(the_xarray, (void *)new_str); + return the_xarray.size() - 1; + } else { + i = 1; + while (index_xarray(the_xarray, i, p) && *((StringPtr)p) != the_str) + ++i; + + if (*((StringPtr)p) == the_str) { + FreeConstStr(new_str); + return i; + } else { + append_to_xarray(the_xarray, (void *)new_str); + return the_xarray.size() - 1; + } + } +} + +/** + * Similar to the above, except that it is to be used when the strings are + * not expected to repeat much. + */ +static int add_non_unique_str(XArrayType &the_xarray, const String &the_str) { + append_to_xarray(the_xarray, (void *)NewConstStr(the_str)); + return the_xarray.size() - 1; +} + +bool get_token(progfile &f) { + StateType state; + bool more_chars; + char bracket, next_ch = '\0'; + String s; + + // Check for old token. f.newlines may have changed while an old token was unconsumed, + // so if the unconsumed token was a NEWLINE and f.newlines is false, we must continue + // and get another token; otherwise we jump out with what we have + if (!f.consumed) { + f.consumed = true; + + if (!((f.ttype == NEWLINE) && !f.newlines)) + return true; + } + + more_chars = true; + state = START; + + while (state != STOP) { + switch (state) { + case START: + if (f.readChar(next_ch)) { + state = DECIDE; + } else { + more_chars = false; + state = STOP; + } + break; + + case DECIDE: + if (!more_chars) + state = STOP; + else if (isWhitespace(next_ch)) + state = WHITE; + else if (isLiteralType(next_ch)) + state = LITERAL; + else if (isStartChar(next_ch)) + state = IDENTIFIER; + else if (isDigit(next_ch)) + state = NUMBER; + else if (isOperChar(next_ch)) + state = OPERATOR; + else { + // a single-character token + switch (next_ch) { + case '#': + state = COMMENT; + case ';': + if (!f.newlines) { + state = START; + } + else { + f.ttype = NEWLINE; + f.tnum = (int)NEWLINE_CH; + state = STOP; + } + default: + f.ttype = PUNCTUATION; + f.tnum = (int)next_ch; + state = STOP; + } + break; + } + break; + + case WHITE: + while (state == WHITE && isWhitespace(next_ch)) { + if (next_ch == NEWLINE_CH && f.newlines) { + f.ttype = NEWLINE; + state = STOP; + } else { + more_chars = f.readChar(next_ch); + } + } + if (state == WHITE) { + if (more_chars) + // decide on new non-white character + state = DECIDE; + else + state = STOP; + } + break; + + case COMMENT: + case QUOTE: + s = ""; + more_chars = f.readChar(next_ch); + while (more_chars && next_ch != NEWLINE_CH) { + s = s + next_ch; + more_chars = f.readChar(next_ch); + } + if (state == COMMENT) { + if (more_chars) + state = START; + else + state = STOP; + } else { + // quoted literal + f.unreadChar(next_ch); // leave \n for the next guy + f.ttype = QUOTE_LIT; + f.tnum = add_non_unique_str(g_vm->Literals, s); + state = STOP; + } + break; + + case LITERAL: + bracket = next_ch; + s = ""; + more_chars = f.readChar(next_ch); // start the loop + while (more_chars && next_ch != NEWLINE_CH && next_ch != bracket) { + if (next_ch == '\\') { + more_chars = f.readChar(next_ch); + switch (next_ch) { + case 't': + next_ch = '\t'; + break; + case 'b': + next_ch = '\x8'; + break; + case 'e': + next_ch = (char)27; + break; + case'n': + s = s + '\r'; + next_ch = '\n'; + break; + } + } + s = s + next_ch; + + more_chars = f.readChar(next_ch); + } + + if (next_ch != bracket) { + f.sourcePos(); + error("Unterminated literal"); + } else { + switch (bracket) { + case '"': + f.ttype = TEXT_LIT; + f.tnum = add_non_unique_str(g_vm->Literals, s); + break; + case '\'': + f.ttype = MESSAGE; + f.tnum = add_unique_str(g_vm->Vocabulary, s); + break; + default: + error("Programmer error: unknown literal type"); + break; + } + + state = STOP; + } + break; + + case IDENTIFIER: + s = ""; + while (isIdChar(next_ch)) { + s = s + next_ch; + more_chars = f.readChar(next_ch); + } + if (!isIdChar(next_ch)) + f.unreadChar(next_ch); + + // Check for reserved words or operators + if (binary_search(Reserved_Wds, NUM_RWORDS, s, f.tnum)) + f.ttype = RESERVED; + else if (binary_search(Operators, NUM_OPERS, s, f.tnum)) + f.ttype = OPER; + else { + f.ttype = IDENT; + f.tnum = add_ident(s); + } + + state = STOP; + break; + + case NUMBER: + s = ""; + while (more_chars && isDigit(next_ch)) { + s = s + next_ch; + more_chars = f.readChar(next_ch); + } + + if (!isDigit(next_ch)) + f.unreadChar(next_ch); + f.ttype = NUMERIC; + + f.tnum = atoi(s.c_str()); + state = STOP; + break; + + case OPERATOR: + s = ""; + + while (more_chars && isLongOper(next_ch) && s != ">>") { + // have to stop short with >> + s = s + next_ch; + more_chars = f.readChar(next_ch); + } + + if (s == ">>") { + f.unreadChar(next_ch); + state = QUOTE; + } else { + if (!isOperChar(next_ch)) + f.unreadChar(next_ch); + else + s = s + next_ch; + + state = STOP; + + if (s == ":") { + f.ttype = PUNCTUATION; + f.tnum = (int)':'; + } else if (!binary_search(Operators, NUM_OPERS, s, f.tnum)) { + f.sourcePos(); + error("Unknown operator %s", s.c_str()); + } else { + f.ttype = OPER; + } + } + break; + + default: + break; + } + } + + return more_chars; +} + +void write_token(AclType the_type, int the_number) { + StringPtr str_ptr; + IdRecPtr the_id_ptr; + void *p; + + switch (the_type) { + case IDENT: + if (the_number < 0) { + g_vm->write("an identifier"); + } + else { + g_vm->write("<identifier %d >: ", the_number); + if (index_ident(the_number, the_id_ptr)) + g_vm->write("\"%s\"", the_id_ptr->id_name); + } + break; + + case RESERVED: + if (the_number < 0) + g_vm->write("a reserved word"); + else + g_vm->write("reserved word \"%s\"", Reserved_Wds[the_number]); + break; + + case OPER: + if (the_number < 0) + g_vm->write("an operator"); + else + g_vm->write("operator \"%s\"", Operators[the_number]); + break; + + case PUNCTUATION: + g_vm->write("%c", (char)the_number); + break; + + case TEXT_LIT: + if (the_number < 0) + g_vm->write("a text literal"); + else if (index_xarray(g_vm->Literals, the_number, p)) { + str_ptr = (StringPtr)p; + g_vm->write("\"%s\"", str_ptr->c_str()); + } + else { + g_vm->write("<text literal %d >: ", the_number); + } + break; + + case MESSAGE: + if (the_number < 0) + g_vm->write("a message"); + else if (index_xarray(g_vm->Vocabulary, the_number, p)) { + str_ptr = (StringPtr)p; + g_vm->write("\'%s\'", str_ptr->c_str()); + } else { + g_vm->write("<message %d>: ", the_number); + } + break; + + case NUMERIC: + g_vm->write("the number %d", the_number); + break; + + default: + g_vm->write("<unknown token>"); + } +} + +} // End of namespace Archetype +} // End of namespace Glk |