aboutsummaryrefslogtreecommitdiff
path: root/engines/glk/jacl/libcsv.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'engines/glk/jacl/libcsv.cpp')
-rw-r--r--engines/glk/jacl/libcsv.cpp529
1 files changed, 529 insertions, 0 deletions
diff --git a/engines/glk/jacl/libcsv.cpp b/engines/glk/jacl/libcsv.cpp
new file mode 100644
index 0000000000..408e60b683
--- /dev/null
+++ b/engines/glk/jacl/libcsv.cpp
@@ -0,0 +1,529 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+#include "glk/jacl/csv.h"
+
+namespace Glk {
+namespace JACL {
+
+#define VERSION "3.0.0"
+
+//#define SIZE_MAX ((size_t)-1)
+
+#define ROW_NOT_BEGUN 0
+#define FIELD_NOT_BEGUN 1
+#define FIELD_BEGUN 2
+#define FIELD_MIGHT_HAVE_ENDED 3
+
+/*
+ Explanation of states
+ ROW_NOT_BEGUN There have not been any fields encountered for this row
+ FIELD_NOT_BEGUN There have been fields but we are currently not in one
+ FIELD_BEGUN We are in a field
+ FIELD_MIGHT_HAVE_ENDED
+ We encountered a double quote inside a quoted field, the
+ field is either ended or the quote is literal
+*/
+
+#define MEM_BLK_SIZE 128
+
+#define SUBMIT_FIELD(p) \
+ do { \
+ if (!quoted) \
+ entry_pos -= spaces; \
+ if (p->options & CSV_APPEND_NULL) \
+ ((p)->entry_buf[entry_pos+1]) = '\0'; \
+ if (cb1) \
+ cb1(p->entry_buf, entry_pos, data); \
+ pstate = FIELD_NOT_BEGUN; \
+ entry_pos = quoted = spaces = 0; \
+ } while (0)
+
+#define SUBMIT_ROW(p, c) \
+ do { \
+ if (cb2) \
+ cb2(c, data); \
+ pstate = ROW_NOT_BEGUN; \
+ entry_pos = quoted = spaces = 0; \
+ } while (0)
+
+#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c))
+
+static char *csv_errors[] = {"success",
+ "error parsing data while strict checking enabled",
+ "memory exhausted while increasing buffer size",
+ "data size too large",
+ "invalid status code"
+ };
+
+int csv_error(struct csv_parser *p) {
+ /* Return the current status of the parser */
+ return p->status;
+}
+
+char *csv_strerror(int status) {
+ /* Return a textual description of status */
+ if (status >= CSV_EINVALID || status < 0)
+ return csv_errors[CSV_EINVALID];
+ else
+ return csv_errors[status];
+}
+
+int csv_get_opts(struct csv_parser *p) {
+ /* Return the currently set options of parser */
+ if (p == NULL)
+ return -1;
+
+ return p->options;
+}
+
+int csv_set_opts(struct csv_parser *p, unsigned char options) {
+ /* Set the options */
+ if (p == NULL)
+ return -1;
+
+ p->options = options;
+ return 0;
+}
+
+int csv_init(struct csv_parser *p, unsigned char options) {
+ /* Initialize a csv_parser object returns 0 on success, -1 on error */
+ if (p == NULL)
+ return -1;
+
+ p->entry_buf = NULL;
+ p->pstate = ROW_NOT_BEGUN;
+ p->quoted = 0;
+ p->spaces = 0;
+ p->entry_pos = 0;
+ p->entry_size = 0;
+ p->status = 0;
+ p->options = options;
+ p->quote_char = CSV_QUOTE;
+ p->delim_char = CSV_COMMA;
+ p->is_space = NULL;
+ p->is_term = NULL;
+ p->blk_size = MEM_BLK_SIZE;
+ p->malloc_func = NULL;
+ p->realloc_func = realloc;
+ p->free_func = free;
+
+ return 0;
+}
+
+void csv_free(struct csv_parser *p) {
+ /* Free the entry_buffer of csv_parser object */
+ if (p == NULL)
+ return;
+
+ if (p->entry_buf)
+ p->free_func(p->entry_buf);
+
+ p->entry_buf = NULL;
+ p->entry_size = 0;
+
+ return;
+}
+
+int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) {
+ /* Finalize parsing. Needed, for example, when file does not end in a newline */
+ int quoted = p->quoted;
+ int pstate = p->pstate;
+ size_t spaces = p->spaces;
+ size_t entry_pos = p->entry_pos;
+
+ if (p == NULL)
+ return -1;
+
+
+ if (p->pstate == FIELD_BEGUN && p->quoted && p->options & CSV_STRICT && p->options & CSV_STRICT_FINI) {
+ /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */
+ p->status = CSV_EPARSE;
+ return -1;
+ }
+
+ switch (p->pstate) {
+ case FIELD_MIGHT_HAVE_ENDED:
+ p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */
+ /* Fall-through */
+ case FIELD_NOT_BEGUN:
+ case FIELD_BEGUN:
+ quoted = p->quoted, pstate = p->pstate;
+ spaces = p->spaces, entry_pos = p->entry_pos;
+ SUBMIT_FIELD(p);
+ SUBMIT_ROW(p, -1);
+ case ROW_NOT_BEGUN: /* Already ended properly */
+ ;
+ }
+
+ /* Reset parser */
+ p->spaces = p->quoted = p->entry_pos = p->status = 0;
+ p->pstate = ROW_NOT_BEGUN;
+
+ return 0;
+}
+
+void csv_set_delim(struct csv_parser *p, unsigned char c) {
+ /* Set the delimiter */
+ if (p) p->delim_char = c;
+}
+
+void csv_set_quote(struct csv_parser *p, unsigned char c) {
+ /* Set the quote character */
+ if (p) p->quote_char = c;
+}
+
+unsigned char csv_get_delim(struct csv_parser *p) {
+ /* Get the delimiter */
+ return p->delim_char;
+}
+
+unsigned char csv_get_quote(struct csv_parser *p) {
+ /* Get the quote character */
+ return p->quote_char;
+}
+
+void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) {
+ /* Set the space function */
+ if (p) p->is_space = f;
+}
+
+void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) {
+ /* Set the term function */
+ if (p) p->is_term = f;
+}
+
+void csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) {
+ /* Set the realloc function used to increase buffer size */
+ if (p && f) p->realloc_func = f;
+}
+
+void csv_set_free_func(struct csv_parser *p, void (*f)(void *)) {
+ /* Set the free function used to free the buffer */
+ if (p && f) p->free_func = f;
+}
+
+void csv_set_blk_size(struct csv_parser *p, size_t size) {
+ /* Set the block size used to increment buffer size */
+ if (p) p->blk_size = size;
+}
+
+size_t csv_get_buffer_size(struct csv_parser *p) {
+ /* Get the size of the entry buffer */
+ if (p)
+ return p->entry_size;
+ return 0;
+}
+
+static int csv_increase_buffer(struct csv_parser *p) {
+ /* Increase the size of the entry buffer. Attempt to increase size by
+ * p->blk_size, if this is larger than SIZE_MAX try to increase current
+ * buffer size to SIZE_MAX. If allocation fails, try to allocate halve
+ * the size and try again until successful or increment size is zero.
+ */
+
+ size_t to_add = p->blk_size;
+ void *vp;
+
+ if (p->entry_size >= SIZE_MAX - to_add)
+ to_add = SIZE_MAX - p->entry_size;
+
+ if (!to_add) {
+ p->status = CSV_ETOOBIG;
+ return -1;
+ }
+
+ while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) {
+ to_add /= 2;
+ if (!to_add) {
+ p->status = CSV_ENOMEM;
+ return -1;
+ }
+ }
+
+ /* Update entry buffer pointer and entry_size if successful */
+ p->entry_buf = (unsigned char *)vp;
+ p->entry_size += to_add;
+ return 0;
+}
+
+size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *),
+ void (*cb2)(int c, void *), void *data) {
+ unsigned const char *us = (unsigned const char *)s; /* Access input data as array of unsigned char */
+ unsigned char c; /* The character we are currently processing */
+ size_t pos = 0; /* The number of characters we have processed in this call */
+
+ /* Store key fields into local variables for performance */
+ unsigned char delim = p->delim_char;
+ unsigned char quote = p->quote_char;
+ int (*is_space)(unsigned char) = p->is_space;
+ int (*is_term)(unsigned char) = p->is_term;
+ int quoted = p->quoted;
+ int pstate = p->pstate;
+ size_t spaces = p->spaces;
+ size_t entry_pos = p->entry_pos;
+
+
+ if (!p->entry_buf && pos < len) {
+ /* Buffer hasn't been allocated yet and len > 0 */
+ if (csv_increase_buffer(p) != 0) {
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos;
+ }
+ }
+
+ while (pos < len) {
+ /* Check memory usage, increase buffer if neccessary */
+ if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size)) {
+ if (csv_increase_buffer(p) != 0) {
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos;
+ }
+ }
+
+ c = us[pos++];
+
+ switch (pstate) {
+ case ROW_NOT_BEGUN:
+ case FIELD_NOT_BEGUN:
+ if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */
+ continue;
+ } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */
+ if (pstate == FIELD_NOT_BEGUN) {
+ SUBMIT_FIELD(p);
+ SUBMIT_ROW(p, (unsigned char)c);
+ } else { /* ROW_NOT_BEGUN */
+ /* Don't submit empty rows by default */
+ if (p->options & CSV_REPALL_NL) {
+ SUBMIT_ROW(p, (unsigned char)c);
+ }
+ }
+ continue;
+ } else if (c == delim) { /* Comma */
+ SUBMIT_FIELD(p);
+ break;
+ } else if (c == quote) { /* Quote */
+ pstate = FIELD_BEGUN;
+ quoted = 1;
+ } else { /* Anything else */
+ pstate = FIELD_BEGUN;
+ quoted = 0;
+ SUBMIT_CHAR(p, c);
+ }
+ break;
+ case FIELD_BEGUN:
+ if (c == quote) { /* Quote */
+ if (quoted) {
+ SUBMIT_CHAR(p, c);
+ pstate = FIELD_MIGHT_HAVE_ENDED;
+ } else {
+ /* STRICT ERROR - double quote inside non-quoted field */
+ if (p->options & CSV_STRICT) {
+ p->status = CSV_EPARSE;
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos - 1;
+ }
+ SUBMIT_CHAR(p, c);
+ spaces = 0;
+ }
+ } else if (c == delim) { /* Comma */
+ if (quoted) {
+ SUBMIT_CHAR(p, c);
+ } else {
+ SUBMIT_FIELD(p);
+ }
+ } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */
+ if (!quoted) {
+ SUBMIT_FIELD(p);
+ SUBMIT_ROW(p, (unsigned char)c);
+ } else {
+ SUBMIT_CHAR(p, c);
+ }
+ } else if (!quoted && (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */
+ SUBMIT_CHAR(p, c);
+ spaces++;
+ } else { /* Anything else */
+ SUBMIT_CHAR(p, c);
+ spaces = 0;
+ }
+ break;
+ case FIELD_MIGHT_HAVE_ENDED:
+ /* This only happens when a quote character is encountered in a quoted field */
+ if (c == delim) { /* Comma */
+ entry_pos -= spaces + 1; /* get rid of spaces and original quote */
+ SUBMIT_FIELD(p);
+ } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */
+ entry_pos -= spaces + 1; /* get rid of spaces and original quote */
+ SUBMIT_FIELD(p);
+ SUBMIT_ROW(p, (unsigned char)c);
+ } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */
+ SUBMIT_CHAR(p, c);
+ spaces++;
+ } else if (c == quote) { /* Quote */
+ if (spaces) {
+ /* STRICT ERROR - unescaped double quote */
+ if (p->options & CSV_STRICT) {
+ p->status = CSV_EPARSE;
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos - 1;
+ }
+ spaces = 0;
+ SUBMIT_CHAR(p, c);
+ } else {
+ /* Two quotes in a row */
+ pstate = FIELD_BEGUN;
+ }
+ } else { /* Anything else */
+ /* STRICT ERROR - unescaped double quote */
+ if (p->options & CSV_STRICT) {
+ p->status = CSV_EPARSE;
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos - 1;
+ }
+ pstate = FIELD_BEGUN;
+ spaces = 0;
+ SUBMIT_CHAR(p, c);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos;
+ return pos;
+}
+
+size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size) {
+ unsigned char *cdest = (unsigned char *)dest;
+ const unsigned char *csrc = (const unsigned char *)src;
+ size_t chars = 0;
+
+ if (src == NULL)
+ return 0;
+
+ if (cdest == NULL)
+ dest_size = 0;
+
+ if (dest_size > 0)
+ *cdest++ = '"';
+ chars++;
+
+ while (src_size) {
+ if (*csrc == '"') {
+ if (dest_size > chars)
+ *cdest++ = '"';
+ if (chars < SIZE_MAX) chars++;
+ }
+ if (dest_size > chars)
+ *cdest++ = *csrc;
+ if (chars < SIZE_MAX) chars++;
+ src_size--;
+ csrc++;
+ }
+
+ if (dest_size > chars)
+ *cdest = '"';
+ if (chars < SIZE_MAX) chars++;
+
+ return chars;
+}
+
+int csv_fwrite(Common::WriteStream *fp, const void *src, size_t src_size) {
+ const unsigned char *csrc = (const unsigned char *)src;
+
+ if (fp == NULL || src == NULL)
+ return 0;
+
+ fp->writeByte('"');
+
+ while (src_size) {
+ if (*csrc == '"') {
+ fp->writeByte('"');
+ }
+ fp->writeByte(*csrc);
+ src_size--;
+ csrc++;
+ }
+
+ fp->writeByte('"');
+ return 0;
+}
+
+size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) {
+ unsigned char *cdest = (unsigned char *)dest;
+ const unsigned char *csrc = (const unsigned char *)src;
+ size_t chars = 0;
+
+ if (src == NULL)
+ return 0;
+
+ if (dest == NULL)
+ dest_size = 0;
+
+ if (dest_size > 0)
+ *cdest++ = quote;
+ chars++;
+
+ while (src_size) {
+ if (*csrc == quote) {
+ if (dest_size > chars)
+ *cdest++ = quote;
+ if (chars < SIZE_MAX) chars++;
+ }
+ if (dest_size > chars)
+ *cdest++ = *csrc;
+ if (chars < SIZE_MAX) chars++;
+ src_size--;
+ csrc++;
+ }
+
+ if (dest_size > chars)
+ *cdest = quote;
+ if (chars < SIZE_MAX) chars++;
+
+ return chars;
+}
+
+int csv_fwrite2(Common::WriteStream *fp, const void *src, size_t src_size, unsigned char quote) {
+ const unsigned char *csrc = (const unsigned char *)src;
+
+ if (fp == NULL || src == NULL)
+ return 0;
+
+ fp->writeByte(quote);
+
+ while (src_size) {
+ if (*csrc == quote) {
+ fp->writeByte(quote);
+ }
+ fp->writeByte(*csrc);
+ src_size--;
+ csrc++;
+ }
+
+ fp->writeByte(quote);
+ return 0;
+}
+
+} // End of namespace JACL
+} // End of namespace Glk