1 files changed, 441 insertions, 0 deletions
diff --git a/engines/glk/archetype/token.cpp b/engines/glk/archetype/token.cpp
new file mode 100644
index 0000000000..3be0e2365b
--- /dev/null
+++ b/engines/glk/archetype/token.cpp
@@ -0,0 +1,441 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers || c == whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License || c == or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not || c == g_vm->write to the Free Software
+ * Foundation || c == Inc. || c == 51 Franklin Street || c == Fifth Floor || c == Boston || c == MA 02110-1301 || c == USA.
+ *
+ */
+
+#include "glk/archetype/token.h"
+#include "glk/archetype/archetype.h"
+#include "glk/archetype/id_table.h"
+#include "glk/archetype/misc.h"
+#include "glk/archetype/keywords.h"
+
+namespace Glk {
+namespace Archetype {
+
+enum StateType { START, STOP, DECIDE, WHITE, COMMENT, QUOTE, LITERAL, IDENTIFIER, NUMBER, OPERATOR };
+
+bool isWhitespace(char c) {
+	return c == ' ' || c == '\t' || c == NEWLINE_CH;
+}
+
+bool isLiteralType(char c) {
+	return c == '"' || c == '\'';
+}
+
+bool isLetter(char c) {
+	return Common::isAlpha(c);
+}
+
+bool isDigit(char c) {
+	return Common::isDigit(c);
+}
+
+bool isStartChar(char c) {
+	return Common::isAlpha(c) || c == '_';
+}
+
+bool isIdChar(char c) {
+	return isStartChar(c) || isDigit(c);
+}
+
+bool isLongOper(char c) {
+	return c == '<' || c == '>' || c == ':' || c == '+' || c == '-' || c == '*'
+		|| c == '/' || c == '&' || c == '~';
+}
+
+bool isOperChar(char c) {
+	return isLongOper(c) || c == '=' || c == '.' || c == '^' || c == '?';
+}
+
+/**
+ * Performs a binary search on the given ordered array, passing back the
+ * index of the given string if it's in the array.
+ * Used for quickly finding an operator or reserved word.
+ * @param the_array		ordered array of short strings
+ * @param elements		number of elements in the array
+ * @param match_str		string to match
+ * @param a_index		Outputs the array index
+ * @returns				true if match_str was an element in the_array; false otherwise
+ */
+static bool binary_search(const LookupType &the_array, int elements,
+		const ShortStringType &match_str, int &a_index) {
+	int left = 0, right = elements - 1, mid;
+
+	do {
+		mid = (left + right) / 2;
+		if (match_str < the_array[mid])
+			right = mid - 1;
+		else
+			left = mid + 1;
+	} while (match_str != the_array[mid] && left <= right);
+
+	if (match_str != the_array[mid]) {
+		return false;
+	} else {
+		a_index = mid;
+		return true;
+	}
+}
+
+/**
+ * Searches the given unordered xarray for a string matching the given
+ * string; if found, returns the index in the list of the string.  If
+ * not found, adds it to the list.
+ * @param the_xarray		xarray to be searched
+ * @param the_str			string to be compared
+ * @returns					The index of the_str in the_xarray.
+ */
+static int add_unique_str(XArrayType &the_xarray, const String &the_str) {
+	StringPtr new_str;
+	int i;
+	void *p;
+
+	// Duplicate the given string
+	new_str = NewConstStr(the_str);
+
+	if (the_xarray.empty()) {
+		append_to_xarray(the_xarray, (void *)new_str);
+		return the_xarray.size() - 1;
+	} else {
+		i = 1;
+		while (index_xarray(the_xarray, i, p) && *((StringPtr)p) != the_str)
+			++i;
+
+		if (*((StringPtr)p) == the_str) {
+			FreeConstStr(new_str);
+			return i;
+		} else {
+			append_to_xarray(the_xarray, (void *)new_str);
+			return the_xarray.size() - 1;
+		}
+	}
+}
+
+/**
+ * Similar to the above, except that it is to be used when the strings are
+ * not expected to repeat much.
+ */
+static int add_non_unique_str(XArrayType &the_xarray, const String &the_str) {
+	append_to_xarray(the_xarray, (void *)NewConstStr(the_str));
+	return the_xarray.size() - 1;
+}
+
+bool get_token(progfile &f) {
+	StateType state;
+	bool more_chars;
+	char bracket, next_ch = '\0';
+	String s;
+
+	// Check for old token.  f.newlines may have changed while an old token was unconsumed,
+	// so if the unconsumed token was a NEWLINE and f.newlines is false, we must continue
+	// and get another token; otherwise we jump out with what we have
+	if (!f.consumed) {
+		f.consumed = true;
+
+		if (!((f.ttype == NEWLINE) && !f.newlines))
+			return true;
+	}
+
+	more_chars = true;
+	state      = START;
+
+	while (state != STOP) {
+		switch (state) {
+		case START:
+			if (f.readChar(next_ch)) {
+				state = DECIDE;
+			} else {
+				more_chars = false;
+				state = STOP;
+			}
+			break;
+
+		case DECIDE:
+			if (!more_chars)
+				state = STOP;
+			else if (isWhitespace(next_ch))
+				state = WHITE;
+			else if (isLiteralType(next_ch))
+				state = LITERAL;
+			else if (isStartChar(next_ch))
+				state = IDENTIFIER;
+			else if (isDigit(next_ch))
+				state = NUMBER;
+			else if (isOperChar(next_ch))
+				state = OPERATOR;
+			else {
+				// a single-character token
+				switch (next_ch) {
+				case '#':
+					state = COMMENT;
+				case ';':
+					if (!f.newlines) {
+						state = START;
+					}
+					else {
+						f.ttype = NEWLINE;
+						f.tnum = (int)NEWLINE_CH;
+						state = STOP;
+					}
+				default:
+					f.ttype = PUNCTUATION;
+					f.tnum = (int)next_ch;
+					state = STOP;
+				}
+				break;
+			}
+			break;
+
+		case WHITE:
+			while (state == WHITE && isWhitespace(next_ch)) {
+				if (next_ch == NEWLINE_CH && f.newlines) {
+					f.ttype = NEWLINE;
+					state = STOP;
+				} else {
+					more_chars = f.readChar(next_ch);
+				}
+			}
+			if (state == WHITE) {
+				if (more_chars)
+					// decide on new non-white character
+					state = DECIDE;
+				else
+					state = STOP;
+			}
+			break;
+
+		case COMMENT:
+		case QUOTE:
+			s = "";
+			more_chars = f.readChar(next_ch);
+			while (more_chars && next_ch != NEWLINE_CH) {
+				s = s + next_ch;
+				more_chars = f.readChar(next_ch);
+			}
+			if (state == COMMENT) {
+				if (more_chars)
+					state = START;
+				else
+					state = STOP;
+			} else {
+				// quoted literal
+				f.unreadChar(next_ch);           // leave \n for the next guy
+				f.ttype    = QUOTE_LIT;
+				f.tnum     = add_non_unique_str(g_vm->Literals, s);
+				state = STOP;
+			}
+			break;
+
+		case LITERAL:
+			bracket = next_ch;
+			s = "";
+			more_chars = f.readChar(next_ch);     // start the loop
+			while (more_chars && next_ch != NEWLINE_CH && next_ch != bracket) {
+				if (next_ch == '\\') {
+					more_chars = f.readChar(next_ch);
+					switch (next_ch) {
+					case 't':
+						next_ch = '\t';
+						break;
+					case 'b':
+						next_ch = '\x8';
+						break;
+					case 'e':
+						next_ch = (char)27;
+						break;
+					case'n':
+						s = s + '\r';
+						next_ch = '\n';
+						break;
+					}
+				}
+				s = s + next_ch;
+
+				more_chars = f.readChar(next_ch);
+			}
+
+			if (next_ch != bracket) {
+				f.sourcePos();
+				error("Unterminated literal");
+			} else {
+				switch (bracket) {
+				case '"':
+					f.ttype = TEXT_LIT;
+					f.tnum = add_non_unique_str(g_vm->Literals, s);
+					break;
+				case '\'':
+					f.ttype = MESSAGE;
+					f.tnum = add_unique_str(g_vm->Vocabulary, s);
+					break;
+				default:
+					error("Programmer error: unknown literal type");
+					break;
+				}
+
+				state = STOP;
+			}
+			break;
+
+		case IDENTIFIER:
+			s = "";
+			while (isIdChar(next_ch)) {
+				s = s + next_ch;
+				more_chars = f.readChar(next_ch);
+			}
+			if (!isIdChar(next_ch))
+				f.unreadChar(next_ch);
+
+			// Check for reserved words or operators
+			if (binary_search(Reserved_Wds, NUM_RWORDS, s, f.tnum))
+				f.ttype = RESERVED;
+			else if (binary_search(Operators, NUM_OPERS, s, f.tnum))
+				f.ttype = OPER;
+			else {
+				f.ttype = IDENT;
+				f.tnum = add_ident(s);
+			}
+
+			state = STOP;
+			break;
+
+		case NUMBER:
+			s = "";
+			while (more_chars && isDigit(next_ch)) {
+				s = s + next_ch;
+				more_chars = f.readChar(next_ch);
+			}
+
+			if (!isDigit(next_ch))
+				f.unreadChar(next_ch);
+			f.ttype = NUMERIC;
+
+			f.tnum = atoi(s.c_str());
+			state = STOP;
+			break;
+
+		case OPERATOR:
+			s = "";
+
+			while (more_chars && isLongOper(next_ch) && s != ">>") {
+				// have to stop short with >>
+				s = s + next_ch;
+				more_chars = f.readChar(next_ch);
+			}
+
+			if (s == ">>") {
+				f.unreadChar(next_ch);
+				state = QUOTE;
+			} else {
+				if (!isOperChar(next_ch))
+					f.unreadChar(next_ch);
+				else
+					s = s + next_ch;
+
+				state = STOP;
+
+				if (s == ":") {
+					f.ttype = PUNCTUATION;
+					f.tnum = (int)':';
+				} else if (!binary_search(Operators, NUM_OPERS, s, f.tnum)) {
+					f.sourcePos();
+					error("Unknown operator %s", s.c_str());
+				} else {
+					f.ttype = OPER;
+				}
+			}
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	return more_chars;
+}
+
+void write_token(AclType the_type, int the_number) {
+	StringPtr str_ptr;
+	IdRecPtr the_id_ptr;
+	void *p;
+
+	switch (the_type) {
+	case IDENT:
+		if (the_number < 0) {
+			g_vm->write("an identifier");
+		}
+		else {
+			g_vm->write("<identifier %d >: ", the_number);
+			if (index_ident(the_number, the_id_ptr))
+				g_vm->write("\"%s\"", the_id_ptr->id_name);
+		}
+		break;
+
+	case RESERVED:
+		if (the_number < 0)
+			g_vm->write("a reserved word");
+		else
+			g_vm->write("reserved word \"%s\"", Reserved_Wds[the_number]);
+		break;
+
+	case OPER:
+		if (the_number < 0)
+			g_vm->write("an operator");
+		else
+			g_vm->write("operator \"%s\"", Operators[the_number]);
+		break;
+
+	case PUNCTUATION:
+		g_vm->write("%c", (char)the_number);
+		break;
+
+	case TEXT_LIT:
+		if (the_number < 0)
+			g_vm->write("a text literal");
+		else if (index_xarray(g_vm->Literals, the_number, p)) {
+			str_ptr = (StringPtr)p;
+			g_vm->write("\"%s\"", str_ptr->c_str());
+		}
+		else {
+			g_vm->write("<text literal %d >: ", the_number);
+		}
+		break;
+
+	case MESSAGE:
+		if (the_number < 0)
+			g_vm->write("a message");
+		else if (index_xarray(g_vm->Vocabulary, the_number, p)) {
+			str_ptr = (StringPtr)p;
+			g_vm->write("\'%s\'", str_ptr->c_str());
+		} else {
+			g_vm->write("<message %d>: ", the_number);
+		}
+		break;
+
+	case NUMERIC:
+		g_vm->write("the number %d", the_number);
+		break;
+
+	default:
+		g_vm->write("<unknown token>");
+	}
+}
+
+} // End of namespace Archetype
+} // End of namespace Glk