diff options
Diffstat (limited to 'devtools/create_translations')
-rw-r--r-- | devtools/create_translations/create_translations.cpp | 187 | ||||
-rw-r--r-- | devtools/create_translations/create_translations.h | 30 | ||||
-rw-r--r-- | devtools/create_translations/module.mk | 11 | ||||
-rw-r--r-- | devtools/create_translations/po_parser.cpp | 409 | ||||
-rw-r--r-- | devtools/create_translations/po_parser.h | 110 |
5 files changed, 747 insertions, 0 deletions
diff --git a/devtools/create_translations/create_translations.cpp b/devtools/create_translations/create_translations.cpp new file mode 100644 index 0000000000..9fcf3b4a31 --- /dev/null +++ b/devtools/create_translations/create_translations.cpp @@ -0,0 +1,187 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * This is a utility for create the translations.dat file from all the po files. + * The generated files is used by ScummVM to propose translation of its GUI. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + // HACK to allow building with the SDL backend on MinGW +// see bug #1800764 "TOOLS: MinGW tools building broken" +#ifdef main +#undef main +#endif // main + +#include "create_translations.h" +#include "po_parser.h" + +#define TRANSLATIONS_DAT_VER 2 // 1 byte + +// Padding buffer (filled with 0) used if we want to aligned writes +// static uint8 padBuf[DATAALIGNMENT]; + +// Utility functions +// Some of the function are very simple but are factored out so that it would require +// minor modifications if we want for example to aligne writes on 4 bytes. +void writeByte(FILE *fp, uint8 b) { + fwrite(&b, 1, 1, fp); +} + +void writeUint16BE(FILE *fp, uint16 value) { + writeByte(fp, (uint8)(value >> 8)); + writeByte(fp, (uint8)(value & 0xFF)); +} + +int stringSize(const char *string) { + // Each string is preceded by its size coded on 2 bytes + if (string == NULL) + return 2; + int len = strlen(string) + 1; + return 2 + len; + // The two lines below are an example if we want to align string writes + // pad = DATAALIGNMENT - (len + 2) % DATAALIGNMENT; + // return 2 + len + pad; +} + +void writeString(FILE *fp, const char *string) { + // Each string is preceded by its size coded on 2 bytes + if (string == NULL) { + writeUint16BE(fp, 0); + return; + } + int len = strlen(string) + 1; + writeUint16BE(fp, len); + fwrite(string, len, 1, fp); + // The commented lines below are an example if we want to align string writes + // It replaces the two lines above. + // int pad = DATAALIGNMENT - (len + 2) % DATAALIGNMENT; + // writeUint16BE(fp, len + pad); + // fwrite(string, len, 1, fp); + // fwrite(padBuf, pad, 1, fp); +} + +// Main +int main(int argc, char *argv[]) { + // Build the translation list + PoMessageList messageIds; + PoMessageEntryList **translations = new PoMessageEntryList*[argc - 1]; + int numLangs = 0; + for (int i = 1; i < argc; ++i) { + translations[numLangs] = parsePoFile(argv[i], messageIds); + if (translations[numLangs] != NULL) + ++numLangs; + } + + FILE *outFile; + int i, lang; + int len; + + // Padding buffer initialization (filled with 0) + // used if we want to aligned writes + // for (i = 0; i < DATAALIGNMENT; i++) + // padBuf[i] = 0; + + outFile = fopen("translations.dat", "wb"); + + // Write header + fwrite("TRANSLATIONS", 12, 1, outFile); + + writeByte(outFile, TRANSLATIONS_DAT_VER); + + // Write number of translations + writeUint16BE(outFile, numLangs); + + // Write the length of each data block here. + // We could write it at the start of each block but that would mean that + // to go to block 4 we would have to go at the start of each preceding block, + // read its size and skip it until we arrive at the block we want. + // By having all the sizes at the start we just need to read the start of the + // file and can then skip to the block we want. + // Blocks are: + // 1. List of languages with the language name + // 2. Original messages (i.e. english) + // 3. First translation + // 4. Second translation + // ... + + // Write length for translation description + len = 0; + for (lang = 0; lang < numLangs; lang++) { + len += stringSize(translations[lang]->language()); + len += stringSize(translations[lang]->languageName()); + } + writeUint16BE(outFile, len); + + // Write size for the original language (english) block + // It starts with the number of strings coded on 2 bytes followed by each + // string (two bytes for the number of chars and the string itself). + len = 2; + for (i = 0; i < messageIds.size(); ++i) + len += stringSize(messageIds[i]); + writeUint16BE(outFile, len); + + // Then comes the size of each translation block. + // It starts with the number of strings coded on 2 bytes, the charset and then the strings. + // For each string we have the string id (on two bytes) followed by + // the string size (two bytes for the number of chars and the string itself). + for (lang = 0; lang < numLangs; lang++) { + len = 2 + stringSize(translations[lang]->charset()); + for (i = 0; i < translations[lang]->size(); ++i) { + len += 2 + stringSize(translations[lang]->entry(i)->msgstr); + len += stringSize(translations[lang]->entry(i)->msgctxt); + } + writeUint16BE(outFile, len); + } + + // Write list of languages + for (lang = 0; lang < numLangs; lang++) { + writeString(outFile, translations[lang]->language()); + writeString(outFile, translations[lang]->languageName()); + } + + // Write original messages + writeUint16BE(outFile, messageIds.size()); + for (i = 0; i < messageIds.size(); ++i) { + writeString(outFile, messageIds[i]); + } + + // Write translations + for (lang = 0; lang < numLangs; lang++) { + writeUint16BE(outFile, translations[lang]->size()); + writeString(outFile, translations[lang]->charset()); + for (i = 0; i < translations[lang]->size(); ++i) { + writeUint16BE(outFile, messageIds.findIndex(translations[lang]->entry(i)->msgid)); + writeString(outFile, translations[lang]->entry(i)->msgstr); + writeString(outFile, translations[lang]->entry(i)->msgctxt); + } + } + + fclose(outFile); + + // Clean the memory + for (i = 0; i < numLangs; ++i) + delete translations[i]; + delete[] translations; + + return 0; +} diff --git a/devtools/create_translations/create_translations.h b/devtools/create_translations/create_translations.h new file mode 100644 index 0000000000..0ece8102f0 --- /dev/null +++ b/devtools/create_translations/create_translations.h @@ -0,0 +1,30 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef CREATE_TRANSLATIONS_H +#define CREATE_TRANSLATIONS_H + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; + +#endif /* CREATE_TRANSLATIONS_H */ diff --git a/devtools/create_translations/module.mk b/devtools/create_translations/module.mk new file mode 100644 index 0000000000..430cf91976 --- /dev/null +++ b/devtools/create_translations/module.mk @@ -0,0 +1,11 @@ +MODULE := devtools/create_translations + +MODULE_OBJS := \ + po_parser.o \ + create_translations.o + +# Set the name of the executable +TOOL_EXECUTABLE := create_translations + +# Include common rules +include $(srcdir)/rules.mk diff --git a/devtools/create_translations/po_parser.cpp b/devtools/create_translations/po_parser.cpp new file mode 100644 index 0000000000..bc49da40d4 --- /dev/null +++ b/devtools/create_translations/po_parser.cpp @@ -0,0 +1,409 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * This is a utility for create the translations.dat file from all the po files. + * The generated files is used by ScummVM to propose translation of its GUI. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "po_parser.h" + +PoMessageList::PoMessageList() : _messages(NULL), _size(0), _allocated(0) { +} + +PoMessageList::~PoMessageList() { + for (int i = 0; i < _size; ++i) + delete[] _messages[i]; + delete[] _messages; +} + +void PoMessageList::insert(const char *msg) { + if (msg == NULL || *msg == '\0') + return; + + // binary-search for the insertion index + int leftIndex = 0; + int rightIndex = _size - 1; + while (rightIndex >= leftIndex) { + int midIndex = (leftIndex + rightIndex) / 2; + int compareResult = strcmp(msg, _messages[midIndex]); + if (compareResult == 0) + return; + else if (compareResult < 0) + rightIndex = midIndex - 1; + else + leftIndex = midIndex + 1; + } + // We now have rightIndex = leftIndex - 1 and we need to insert the new message + // between the two (i.a. at leftIndex). + if (_size + 1 > _allocated) { + _allocated += 100; + char **newMessages = new char*[_allocated]; + for (int i = 0; i < leftIndex; ++i) + newMessages[i] = _messages[i]; + for (int i = leftIndex; i < _size; ++i) + newMessages[i + 1] = _messages[i]; + delete[] _messages; + _messages = newMessages; + } else { + for (int i = _size - 1; i >= leftIndex; --i) + _messages[i + 1] = _messages[i]; + } + _messages[leftIndex] = new char[1 + strlen(msg)]; + strcpy(_messages[leftIndex], msg); + ++_size; +} + +int PoMessageList::findIndex(const char *msg) { + if (msg == NULL || *msg == '\0') + return -1; + + // binary-search for the message + int leftIndex = 0; + int rightIndex = _size - 1; + + while (rightIndex >= leftIndex) { + const int midIndex = (leftIndex + rightIndex) / 2; + const int compareResult = strcmp(msg, _messages[midIndex]); + if (compareResult == 0) + return midIndex; + else if (compareResult < 0) + rightIndex = midIndex - 1; + else + leftIndex = midIndex + 1; + } + + return -1; +} + +int PoMessageList::size() const { + return _size; +} + +const char *PoMessageList::operator[](int index) const { + if (index < 0 || index >= _size) + return NULL; + return _messages[index]; +} + +PoMessageEntryList::PoMessageEntryList(const char *lang) : + _lang(NULL), _charset(NULL), _langName(NULL), + _list(NULL), _size(0), _allocated(0) +{ + _lang = new char[1 + strlen(lang)]; + strcpy(_lang, lang); + // Set default charset to empty string + _charset = new char[1]; + _charset[0] = '\0'; + // Set default langName to lang + _langName = new char[1 + strlen(lang)]; + strcpy(_langName, lang); +} + +PoMessageEntryList::~PoMessageEntryList() { + delete[] _lang; + delete[] _charset; + delete[] _langName; + for (int i = 0; i < _size; ++i) + delete _list[i]; + delete[] _list; +} + +void PoMessageEntryList::addMessageEntry(const char *translation, const char *message, const char *context) { + if (*message == '\0') { + // This is the header. + // We get the charset and the language name from the translation string + char *str = parseLine(translation, "Language:"); + if (str != NULL) { + delete[] _langName; + _langName = str; + } + str = parseLine(translation, "charset="); + if (str != NULL) { + delete[] _charset; + _charset = str; + } + return; + } + + // binary-search for the insertion index + int leftIndex = 0; + int rightIndex = _size - 1; + while (rightIndex >= leftIndex) { + int midIndex = (leftIndex + rightIndex) / 2; + int compareResult = strcmp(message, _list[midIndex]->msgid); + if (compareResult == 0) { + if (context == NULL) { + if (_list[midIndex]->msgctxt == NULL) + return; + compareResult = -1; + } else { + if (_list[midIndex]->msgctxt == NULL) + compareResult = 1; + else { + compareResult = strcmp(context, _list[midIndex]->msgctxt); + if (compareResult == 0) + return; + } + } + } + if (compareResult < 0) + rightIndex = midIndex - 1; + else + leftIndex = midIndex + 1; + } + // We now have rightIndex = leftIndex - 1 and we need to insert the new message + // between the two (i.a. at leftIndex). + // However since the TranslationManager will pick the translation associated to no + // context if it is not present for a specific context, we can optimize the file + // size, memory used at run-time and performances (less strings to read from the file + // and less strings to look for) by avoiding duplicate. + if (context != NULL && *context != '\0') { + // Check if we have the same translation for no context + int contextIndex = leftIndex - 1; + while (contextIndex >= 0 && strcmp (message, _list[contextIndex]->msgid) == 0) { + --contextIndex; + } + ++contextIndex; + if (contextIndex < leftIndex && _list[contextIndex]->msgctxt == NULL && strcmp(translation, _list[contextIndex]->msgstr) == 0) + return; + } + + + if (_size + 1 > _allocated) { + _allocated += 100; + PoMessageEntry **newList = new PoMessageEntry*[_allocated]; + for (int i = 0; i < leftIndex; ++i) + newList[i] = _list[i]; + for (int i = leftIndex; i < _size; ++i) + newList[i + 1] = _list[i]; + delete[] _list; + _list = newList; + } else { + for (int i = _size - 1; i >= leftIndex; --i) + _list[i + 1] = _list[i]; + } + _list[leftIndex] = new PoMessageEntry(translation, message, context); + ++_size; + + if (context == NULL || *context == '\0') { + // Remove identical translations for a specific context (see comment above) + int contextIndex = leftIndex + 1; + int removed = 0; + while (contextIndex < _size && strcmp(message, _list[contextIndex]->msgid) == 0) { + if (strcmp(translation, _list[contextIndex]->msgstr) == 0) { + delete _list[contextIndex]; + ++removed; + } else { + _list[contextIndex - removed] = _list[contextIndex]; + } + ++contextIndex; + } + if (removed > 0) { + while (contextIndex < _size) { + _list[contextIndex - removed] = _list[contextIndex]; + ++contextIndex; + } + } + _size -= removed; + } + +} + +const char *PoMessageEntryList::language() const { + return _lang; +} + +const char *PoMessageEntryList::languageName() const { + return _langName; +} + +const char *PoMessageEntryList::charset() const { + return _charset; +} + +int PoMessageEntryList::size() const { + return _size; +} + +const PoMessageEntry *PoMessageEntryList::entry(int index) const { + if (index < 0 || index >= _size) + return NULL; + return _list[index]; +} + + +PoMessageEntryList *parsePoFile(const char *file, PoMessageList& messages) { + FILE *inFile = fopen(file, "r"); + if (!inFile) + return NULL; + + char msgidBuf[1024], msgctxtBuf[1024], msgstrBuf[1024]; + char line[1024], *currentBuf = msgstrBuf; + + // Get language from file name and create PoMessageEntryList + int index = 0, start_index = strlen(file) - 1; + while (start_index > 0 && file[start_index - 1] != '/' && file[start_index - 1] != '\\') { + --start_index; + } + while (file[start_index + index] != '.' && file[start_index + index] != '\0') { + msgidBuf[index] = file[start_index + index]; + ++index; + } + msgidBuf[index] = '\0'; + PoMessageEntryList *list = new PoMessageEntryList(msgidBuf); + + // Initialize the message attributes. + bool fuzzy = false; + bool fuzzy_next = false; + + // Parse the file line by line. + // The msgstr is always the last line of an entry (i.e. msgid and msgctxt always + // precede the corresponding msgstr). + msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0'; + while (!feof(inFile) && fgets(line, 1024, inFile)) { + if (line[0] == '#' && line[1] == ',') { + // Handle message attributes. + if (strstr(line, "fuzzy")) { + fuzzy_next = true; + continue; + } + } + // Skip empty and comment line + if (*line == '\n' || *line == '#') + continue; + if (strncmp(line, "msgid", 5) == 0) { + if (currentBuf == msgstrBuf) { + // add previous entry + if (*msgstrBuf != '\0' && !fuzzy) { + messages.insert(msgidBuf); + list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf); + } + msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0'; + + // Reset the attribute flags. + fuzzy = fuzzy_next; + fuzzy_next = false; + } + strcpy(msgidBuf, stripLine(line)); + currentBuf = msgidBuf; + } else if (strncmp(line, "msgctxt", 7) == 0) { + if (currentBuf == msgstrBuf) { + // add previous entry + if (*msgstrBuf != '\0' && !fuzzy) { + messages.insert(msgidBuf); + list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf); + } + msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0'; + + // Reset the attribute flags + fuzzy = fuzzy_next; + fuzzy_next = false; + } + strcpy(msgctxtBuf, stripLine(line)); + currentBuf = msgctxtBuf; + } else if (strncmp(line, "msgstr", 6) == 0) { + strcpy(msgstrBuf, stripLine(line)); + currentBuf = msgstrBuf; + } else { + // concatenate the string at the end of the current buffer + if (currentBuf) + strcat(currentBuf, stripLine(line)); + } + } + + fclose(inFile); + return list; +} + +char *stripLine(char *line) { + // This function modifies line in place and return it. + // Keep only the text between the first two unprotected quotes. + // It also look for literal special characters (e.g. preceded by '\n', '\\', '\"', '\'', '\t') + // and replace them by the special character so that strcmp() can match them at run time. + // Look for the first quote + int start = 0; + int len = strlen(line); + while (start < len && line[start++] != '"') {} + // shift characters until we reach the end of the string or an unprotected quote + int i = 0, j = 0; + while (start + i + j < len && line[start + i + j] != '"') { + if (line[start + i + j] == '\\') { + switch (line[start + i + j + 1]) { + case 'n': + line[i++] = '\n'; + break; + case 't': + line[i++] = '\t'; + break; + case '\"': + line[i++] = '\"'; + break; + case '\'': + line[i++] = '\''; + break; + case '\\': + line[i++] = '\\'; + break; + default: + // Just skip + fprintf(stdout, "Unsupported special character \"%c%c\" in string. Please contact ScummVM developers.\n", line[start + i + j], line[start + i + j + 1]); + ++j; + } + ++j; + } else { + line[i] = line[start + i + j]; + ++i; + } + } + line[i] = '\0'; + return line; +} + +char *parseLine(const char *line, const char *field) { + // This function allocate and return a new char*. + // It will return a NULL pointer if the field is not found. + // It is used to parse the header of the po files to find the language name + // and the charset. + const char *str = strstr(line, field); + if (str == NULL) + return NULL; + str += strlen(field); + // Skip spaces + while (*str != '\0' && isspace(*str)) { + ++str; + } + // Find string length (stop at the first '\n') + int len = 0; + while (str[len] != '\0' && str[len] != '\n') { + ++len; + } + if (len == 0) + return NULL; + // Create result string + char *result = new char[len + 1]; + strncpy(result, str, len); + result[len] = '\0'; + return result; +} diff --git a/devtools/create_translations/po_parser.h b/devtools/create_translations/po_parser.h new file mode 100644 index 0000000000..6991b1d11e --- /dev/null +++ b/devtools/create_translations/po_parser.h @@ -0,0 +1,110 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef PO_PARSER_H +#define PO_PARSER_H + + +/** + * List of english messages. + */ +class PoMessageList { +public: + PoMessageList(); + ~PoMessageList(); + + void insert(const char *msg); + int findIndex(const char *msg); + + int size() const; + const char *operator[](int) const; + +private: + char **_messages; + int _size; + int _allocated; +}; + +/** + * Describes a translation entry. + */ +struct PoMessageEntry { + char *msgstr; + char *msgid; + char *msgctxt; + + PoMessageEntry(const char *translation, const char *message, const char *context = NULL) : + msgstr(NULL), msgid(NULL), msgctxt(NULL) + { + if (translation != NULL && *translation != '\0') { + msgstr = new char[1 + strlen(translation)]; + strcpy(msgstr, translation); + } + if (message != NULL && *message != '\0') { + msgid = new char[1 + strlen(message)]; + strcpy(msgid, message); + } + if (context != NULL && *context != '\0') { + msgctxt = new char[1 + strlen(context)]; + strcpy(msgctxt, context); + } + } + ~PoMessageEntry() { + delete[] msgstr; + delete[] msgid; + delete[] msgctxt; + } +}; + +/** + * List of translation entries for one language. + */ +class PoMessageEntryList { +public: + PoMessageEntryList(const char *language); + ~PoMessageEntryList(); + + void addMessageEntry(const char *translation, const char *message, const char *context = NULL); + + const char *language() const; + const char *languageName() const; + const char *charset() const; + + int size() const; + const PoMessageEntry *entry(int) const; + +private: + char *_lang; + char *_charset; + char *_langName; + + PoMessageEntry **_list; + int _size; + int _allocated; +}; + + +PoMessageEntryList *parsePoFile(const char *file, PoMessageList &); +char *stripLine(char *); +char *parseLine(const char *line, const char *field); + +#endif /* PO_PARSER_H */ |