aboutsummaryrefslogtreecommitdiff
path: root/tools/create_translations/po_parser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/create_translations/po_parser.cpp')
-rw-r--r--tools/create_translations/po_parser.cpp409
1 files changed, 409 insertions, 0 deletions
diff --git a/tools/create_translations/po_parser.cpp b/tools/create_translations/po_parser.cpp
new file mode 100644
index 0000000000..bc49da40d4
--- /dev/null
+++ b/tools/create_translations/po_parser.cpp
@@ -0,0 +1,409 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * This is a utility for create the translations.dat file from all the po files.
+ * The generated files is used by ScummVM to propose translation of its GUI.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "po_parser.h"
+
+PoMessageList::PoMessageList() : _messages(NULL), _size(0), _allocated(0) {
+}
+
+PoMessageList::~PoMessageList() {
+ for (int i = 0; i < _size; ++i)
+ delete[] _messages[i];
+ delete[] _messages;
+}
+
+void PoMessageList::insert(const char *msg) {
+ if (msg == NULL || *msg == '\0')
+ return;
+
+ // binary-search for the insertion index
+ int leftIndex = 0;
+ int rightIndex = _size - 1;
+ while (rightIndex >= leftIndex) {
+ int midIndex = (leftIndex + rightIndex) / 2;
+ int compareResult = strcmp(msg, _messages[midIndex]);
+ if (compareResult == 0)
+ return;
+ else if (compareResult < 0)
+ rightIndex = midIndex - 1;
+ else
+ leftIndex = midIndex + 1;
+ }
+ // We now have rightIndex = leftIndex - 1 and we need to insert the new message
+ // between the two (i.a. at leftIndex).
+ if (_size + 1 > _allocated) {
+ _allocated += 100;
+ char **newMessages = new char*[_allocated];
+ for (int i = 0; i < leftIndex; ++i)
+ newMessages[i] = _messages[i];
+ for (int i = leftIndex; i < _size; ++i)
+ newMessages[i + 1] = _messages[i];
+ delete[] _messages;
+ _messages = newMessages;
+ } else {
+ for (int i = _size - 1; i >= leftIndex; --i)
+ _messages[i + 1] = _messages[i];
+ }
+ _messages[leftIndex] = new char[1 + strlen(msg)];
+ strcpy(_messages[leftIndex], msg);
+ ++_size;
+}
+
+int PoMessageList::findIndex(const char *msg) {
+ if (msg == NULL || *msg == '\0')
+ return -1;
+
+ // binary-search for the message
+ int leftIndex = 0;
+ int rightIndex = _size - 1;
+
+ while (rightIndex >= leftIndex) {
+ const int midIndex = (leftIndex + rightIndex) / 2;
+ const int compareResult = strcmp(msg, _messages[midIndex]);
+ if (compareResult == 0)
+ return midIndex;
+ else if (compareResult < 0)
+ rightIndex = midIndex - 1;
+ else
+ leftIndex = midIndex + 1;
+ }
+
+ return -1;
+}
+
+int PoMessageList::size() const {
+ return _size;
+}
+
+const char *PoMessageList::operator[](int index) const {
+ if (index < 0 || index >= _size)
+ return NULL;
+ return _messages[index];
+}
+
+PoMessageEntryList::PoMessageEntryList(const char *lang) :
+ _lang(NULL), _charset(NULL), _langName(NULL),
+ _list(NULL), _size(0), _allocated(0)
+{
+ _lang = new char[1 + strlen(lang)];
+ strcpy(_lang, lang);
+ // Set default charset to empty string
+ _charset = new char[1];
+ _charset[0] = '\0';
+ // Set default langName to lang
+ _langName = new char[1 + strlen(lang)];
+ strcpy(_langName, lang);
+}
+
+PoMessageEntryList::~PoMessageEntryList() {
+ delete[] _lang;
+ delete[] _charset;
+ delete[] _langName;
+ for (int i = 0; i < _size; ++i)
+ delete _list[i];
+ delete[] _list;
+}
+
+void PoMessageEntryList::addMessageEntry(const char *translation, const char *message, const char *context) {
+ if (*message == '\0') {
+ // This is the header.
+ // We get the charset and the language name from the translation string
+ char *str = parseLine(translation, "Language:");
+ if (str != NULL) {
+ delete[] _langName;
+ _langName = str;
+ }
+ str = parseLine(translation, "charset=");
+ if (str != NULL) {
+ delete[] _charset;
+ _charset = str;
+ }
+ return;
+ }
+
+ // binary-search for the insertion index
+ int leftIndex = 0;
+ int rightIndex = _size - 1;
+ while (rightIndex >= leftIndex) {
+ int midIndex = (leftIndex + rightIndex) / 2;
+ int compareResult = strcmp(message, _list[midIndex]->msgid);
+ if (compareResult == 0) {
+ if (context == NULL) {
+ if (_list[midIndex]->msgctxt == NULL)
+ return;
+ compareResult = -1;
+ } else {
+ if (_list[midIndex]->msgctxt == NULL)
+ compareResult = 1;
+ else {
+ compareResult = strcmp(context, _list[midIndex]->msgctxt);
+ if (compareResult == 0)
+ return;
+ }
+ }
+ }
+ if (compareResult < 0)
+ rightIndex = midIndex - 1;
+ else
+ leftIndex = midIndex + 1;
+ }
+ // We now have rightIndex = leftIndex - 1 and we need to insert the new message
+ // between the two (i.a. at leftIndex).
+ // However since the TranslationManager will pick the translation associated to no
+ // context if it is not present for a specific context, we can optimize the file
+ // size, memory used at run-time and performances (less strings to read from the file
+ // and less strings to look for) by avoiding duplicate.
+ if (context != NULL && *context != '\0') {
+ // Check if we have the same translation for no context
+ int contextIndex = leftIndex - 1;
+ while (contextIndex >= 0 && strcmp (message, _list[contextIndex]->msgid) == 0) {
+ --contextIndex;
+ }
+ ++contextIndex;
+ if (contextIndex < leftIndex && _list[contextIndex]->msgctxt == NULL && strcmp(translation, _list[contextIndex]->msgstr) == 0)
+ return;
+ }
+
+
+ if (_size + 1 > _allocated) {
+ _allocated += 100;
+ PoMessageEntry **newList = new PoMessageEntry*[_allocated];
+ for (int i = 0; i < leftIndex; ++i)
+ newList[i] = _list[i];
+ for (int i = leftIndex; i < _size; ++i)
+ newList[i + 1] = _list[i];
+ delete[] _list;
+ _list = newList;
+ } else {
+ for (int i = _size - 1; i >= leftIndex; --i)
+ _list[i + 1] = _list[i];
+ }
+ _list[leftIndex] = new PoMessageEntry(translation, message, context);
+ ++_size;
+
+ if (context == NULL || *context == '\0') {
+ // Remove identical translations for a specific context (see comment above)
+ int contextIndex = leftIndex + 1;
+ int removed = 0;
+ while (contextIndex < _size && strcmp(message, _list[contextIndex]->msgid) == 0) {
+ if (strcmp(translation, _list[contextIndex]->msgstr) == 0) {
+ delete _list[contextIndex];
+ ++removed;
+ } else {
+ _list[contextIndex - removed] = _list[contextIndex];
+ }
+ ++contextIndex;
+ }
+ if (removed > 0) {
+ while (contextIndex < _size) {
+ _list[contextIndex - removed] = _list[contextIndex];
+ ++contextIndex;
+ }
+ }
+ _size -= removed;
+ }
+
+}
+
+const char *PoMessageEntryList::language() const {
+ return _lang;
+}
+
+const char *PoMessageEntryList::languageName() const {
+ return _langName;
+}
+
+const char *PoMessageEntryList::charset() const {
+ return _charset;
+}
+
+int PoMessageEntryList::size() const {
+ return _size;
+}
+
+const PoMessageEntry *PoMessageEntryList::entry(int index) const {
+ if (index < 0 || index >= _size)
+ return NULL;
+ return _list[index];
+}
+
+
+PoMessageEntryList *parsePoFile(const char *file, PoMessageList& messages) {
+ FILE *inFile = fopen(file, "r");
+ if (!inFile)
+ return NULL;
+
+ char msgidBuf[1024], msgctxtBuf[1024], msgstrBuf[1024];
+ char line[1024], *currentBuf = msgstrBuf;
+
+ // Get language from file name and create PoMessageEntryList
+ int index = 0, start_index = strlen(file) - 1;
+ while (start_index > 0 && file[start_index - 1] != '/' && file[start_index - 1] != '\\') {
+ --start_index;
+ }
+ while (file[start_index + index] != '.' && file[start_index + index] != '\0') {
+ msgidBuf[index] = file[start_index + index];
+ ++index;
+ }
+ msgidBuf[index] = '\0';
+ PoMessageEntryList *list = new PoMessageEntryList(msgidBuf);
+
+ // Initialize the message attributes.
+ bool fuzzy = false;
+ bool fuzzy_next = false;
+
+ // Parse the file line by line.
+ // The msgstr is always the last line of an entry (i.e. msgid and msgctxt always
+ // precede the corresponding msgstr).
+ msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+ while (!feof(inFile) && fgets(line, 1024, inFile)) {
+ if (line[0] == '#' && line[1] == ',') {
+ // Handle message attributes.
+ if (strstr(line, "fuzzy")) {
+ fuzzy_next = true;
+ continue;
+ }
+ }
+ // Skip empty and comment line
+ if (*line == '\n' || *line == '#')
+ continue;
+ if (strncmp(line, "msgid", 5) == 0) {
+ if (currentBuf == msgstrBuf) {
+ // add previous entry
+ if (*msgstrBuf != '\0' && !fuzzy) {
+ messages.insert(msgidBuf);
+ list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
+ }
+ msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+
+ // Reset the attribute flags.
+ fuzzy = fuzzy_next;
+ fuzzy_next = false;
+ }
+ strcpy(msgidBuf, stripLine(line));
+ currentBuf = msgidBuf;
+ } else if (strncmp(line, "msgctxt", 7) == 0) {
+ if (currentBuf == msgstrBuf) {
+ // add previous entry
+ if (*msgstrBuf != '\0' && !fuzzy) {
+ messages.insert(msgidBuf);
+ list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
+ }
+ msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
+
+ // Reset the attribute flags
+ fuzzy = fuzzy_next;
+ fuzzy_next = false;
+ }
+ strcpy(msgctxtBuf, stripLine(line));
+ currentBuf = msgctxtBuf;
+ } else if (strncmp(line, "msgstr", 6) == 0) {
+ strcpy(msgstrBuf, stripLine(line));
+ currentBuf = msgstrBuf;
+ } else {
+ // concatenate the string at the end of the current buffer
+ if (currentBuf)
+ strcat(currentBuf, stripLine(line));
+ }
+ }
+
+ fclose(inFile);
+ return list;
+}
+
+char *stripLine(char *line) {
+ // This function modifies line in place and return it.
+ // Keep only the text between the first two unprotected quotes.
+ // It also look for literal special characters (e.g. preceded by '\n', '\\', '\"', '\'', '\t')
+ // and replace them by the special character so that strcmp() can match them at run time.
+ // Look for the first quote
+ int start = 0;
+ int len = strlen(line);
+ while (start < len && line[start++] != '"') {}
+ // shift characters until we reach the end of the string or an unprotected quote
+ int i = 0, j = 0;
+ while (start + i + j < len && line[start + i + j] != '"') {
+ if (line[start + i + j] == '\\') {
+ switch (line[start + i + j + 1]) {
+ case 'n':
+ line[i++] = '\n';
+ break;
+ case 't':
+ line[i++] = '\t';
+ break;
+ case '\"':
+ line[i++] = '\"';
+ break;
+ case '\'':
+ line[i++] = '\'';
+ break;
+ case '\\':
+ line[i++] = '\\';
+ break;
+ default:
+ // Just skip
+ fprintf(stdout, "Unsupported special character \"%c%c\" in string. Please contact ScummVM developers.\n", line[start + i + j], line[start + i + j + 1]);
+ ++j;
+ }
+ ++j;
+ } else {
+ line[i] = line[start + i + j];
+ ++i;
+ }
+ }
+ line[i] = '\0';
+ return line;
+}
+
+char *parseLine(const char *line, const char *field) {
+ // This function allocate and return a new char*.
+ // It will return a NULL pointer if the field is not found.
+ // It is used to parse the header of the po files to find the language name
+ // and the charset.
+ const char *str = strstr(line, field);
+ if (str == NULL)
+ return NULL;
+ str += strlen(field);
+ // Skip spaces
+ while (*str != '\0' && isspace(*str)) {
+ ++str;
+ }
+ // Find string length (stop at the first '\n')
+ int len = 0;
+ while (str[len] != '\0' && str[len] != '\n') {
+ ++len;
+ }
+ if (len == 0)
+ return NULL;
+ // Create result string
+ char *result = new char[len + 1];
+ strncpy(result, str, len);
+ result[len] = '\0';
+ return result;
+}