diff options
author | Eugene Sandulenko | 2018-12-25 12:37:23 +0100 |
---|---|---|
committer | Eugene Sandulenko | 2018-12-25 12:37:23 +0100 |
commit | 1f6727eab85c8f544718deba6a820e1b71ee5c93 (patch) | |
tree | 5d86c27377baa8fac79ced213a8fe4b21a3a1344 /devtools/create_bladerunner/subtitles/mixResourceCreator | |
parent | b6e93680f449b6d988aeceac0c0a58218b2992d0 (diff) | |
download | scummvm-rg350-1f6727eab85c8f544718deba6a820e1b71ee5c93.tar.gz scummvm-rg350-1f6727eab85c8f544718deba6a820e1b71ee5c93.tar.bz2 scummvm-rg350-1f6727eab85c8f544718deba6a820e1b71ee5c93.zip |
DEVTOOLS: Rename blade_runner to create_bladerunner
Diffstat (limited to 'devtools/create_bladerunner/subtitles/mixResourceCreator')
4 files changed, 805 insertions, 0 deletions
diff --git a/devtools/create_bladerunner/subtitles/mixResourceCreator/packBladeRunnerMIXFromPCTLKXLS-04.py b/devtools/create_bladerunner/subtitles/mixResourceCreator/packBladeRunnerMIXFromPCTLKXLS-04.py new file mode 100644 index 0000000000..2ebb5e9fe3 --- /dev/null +++ b/devtools/create_bladerunner/subtitles/mixResourceCreator/packBladeRunnerMIXFromPCTLKXLS-04.py @@ -0,0 +1,725 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Created by Praetorian (ShadowNate) for Classic Adventures in Greek +# classic.adventures.in.greek@gmail.com +# Works with Excel version outSpeech-15-06-2018-1856-TranslatingComms-080.xls and above +# +# TODO Support at least one translation too (ie Greek) +# Print a warning if packing a TRE without the corresponding font(s) -- only a warning though +# +import os, sys, shutil +import ctypes +import csv +import os.path +import xlrd +from xlrd import * +# for pack +from struct import * +import re + +company_email = "classic.adventures.in.greek@gmail.com" +app_version = "0.60" +app_name = "packBladeRunnerMIXFromPCTLKXLS" +app_name_spaced = "Get a TRE file from spoken in-game quotes" +numOfSpokenQuotes = 0 + +defaultSubtitlesFontName = 'SUBTLS_E.FON' + +origEncoding = 'windows-1252' +defaultTargetEncoding = 'windows-1252' +defaultTargetEncodingUnicode = unicode(defaultTargetEncoding, 'utf-8') +targetEncoding = '' +targetEncodingUnicode = '' + +configureFontsTranslationTextFile = u'configureFontsTranslation.txt' +relPath = u'.' +configureFontsTranslationTextFileRelPath = os.path.join(relPath, configureFontsTranslationTextFile) + + +# DONE ADD ALL SHEETS NEEDED FROM THE XLS +# all dialogue sheets get the SUBTLS_E.FON for translation to TRE +# - TODO maybe merge this with TAHOMA18.FON eventually +supportedDialogueSheets = ['INGQUO_E.TRE', 'WSTLGO_E.VQA', 'BRLOGO_E.VQA', 'INTRO_E.VQA', 'MW_A_E.VQA', 'MW_B01_E.VQA', 'MW_B02_E.VQA', 'MW_B03_E.VQA', 'MW_B04_E.VQA', 'MW_B05_E.VQA', 'INTRGT_E.VQA', 'MW_D_E.VQA', 'MW_C01_E.VQA', 'MW_C02_E.VQA', 'MW_C03_E.VQA', 'END04A_E.VQA', 'END04B_E.VQA', 'END04C_E.VQA', 'END06_E.VQA', 'END01A_E.VQA', 'END01B_E.VQA', 'END01C_E.VQA', 'END01D_E.VQA', 'END01E_E.VQA', 'END01F_E.VQA', 'END03_E.VQA'] +# +# Each TRE sheet gets a specific font to handle their translation to TRE +# TAHOMA means both TAHOMA (their translation should be identical (although in the original they have minor differences but they don't affect anything) +# We use a single naming for TAHOMA here because both TAHOMA18 and TAHOMA24 are used for ENDCRED.TRE +# The TRE files that are identically named to the originals are supposed to override them (needs ScummVM compatible functionality for that) + +supportedTranslationSheets = [('OPTIONS.TRE', 'KIA6PT'), ('DLGMENU.TRE', 'KIA6PT'), ('SCORERS.TRE', 'TAHOMA'), ('VK.TRE', 'KIA6PT'), ('CLUES.TRE', 'KIA6PT'), ('CRIMES.TRE', 'KIA6PT'), ('ACTORS.TRE', 'KIA6PT'), ('HELP.TRE', 'KIA6PT'), ('AUTOSAVE.TRE', 'KIA6PT'), ('ERRORMSG.TRE', 'KIA6PT'), ('SPINDEST.TRE', 'KIA6PT'), ('KIA.TRE', 'KIA6PT'), ('KIACRED.TRE', 'KIA6PT'), ('CLUETYPE.TRE', 'KIA6PT'), ('ENDCRED.TRE', 'TAHOMA'), ('POGO.TRE', 'KIA6PT')] +# The FON files that are identically named to the originals are supposed to override them (needs ScummVM compatible functionality for that) +supportedOtherFilesForMix = [defaultSubtitlesFontName, 'KIA6PT.FON', 'TAHOMA18.FON', 'TAHOMA24.FON'] # , '10PT.FON'] # we don't deal with 10PT.FON since it's not used -- TODO verify this. + + +tableOfStringIds = [] +tableOfStringOffsets = [] +tableOfStringEntries = [] + +# this list is used in order to replace the actual indices of characters with delegate font indices (ASCII indexes of the target code-page) which have been used during the font creation (or exist in in the internal TAHOMA font) +# contains tuples of two values. First value is actual Utf char, the second is a replacement ASCII char +listOfFontNamesToOutOfOrderGlyphs = [] +arrangedListOfFontNamesToOutOfOrderGlyphs = [] + +actorPropertyEntries = [] +actorPropertyEntriesWasInit = False + +def initOverrideEncoding(): + global targetEncoding + + configureTranslationFailed = True + try: + if os.access(configureFontsTranslationTextFileRelPath, os.F_OK): + conFontsTranslationFile = open(configureFontsTranslationTextFileRelPath, 'r') + linesLst = conFontsTranslationFile.readlines() + conFontsTranslationFile.close() + if linesLst is None or len(linesLst) == 0: + configureTranslationFailed = True + else: + print "Font Translation Configuration Info: " + involvedTokensLst =[] + for readEncodLine in linesLst: + tmplineTokens = re.findall("[^\t\n]+",readEncodLine ) + for x in tmplineTokens: + involvedTokensLst.append(x) + + for tokenNameKeyPair in involvedTokensLst: + nameKeyTupl = tokenNameKeyPair.split('=', 1) + try: + if len(nameKeyTupl) == 2 and nameKeyTupl[0] == 'targetEncoding' and nameKeyTupl[1] is not None and nameKeyTupl[1] != '-' and nameKeyTupl[1] != '': + targetEncodingUnicode = unicode(nameKeyTupl[1], 'utf-8') + targetEncoding = unicode.encode("%s" % targetEncodingUnicode, origEncoding) + elif len(nameKeyTupl) == 2 and nameKeyTupl[0] == 'fontNameAndOutOfOrderGlyphs' and nameKeyTupl[1] is not None and nameKeyTupl[1] != '': + # split at hash tag first + tmpListOfOutOfOrderGlyphs = [] + del(tmpListOfOutOfOrderGlyphs[:]) + fontNameAndOOOGlyphsTuple = nameKeyTupl[1].split('#', 1) + if (len (fontNameAndOOOGlyphsTuple) == 2 and fontNameAndOOOGlyphsTuple[0] != '' and fontNameAndOOOGlyphsTuple[1] is not None and fontNameAndOOOGlyphsTuple[1] != ''): + tmpFontName = fontNameAndOOOGlyphsTuple[0] + # split at comma, then split at ':' and store tuples of character + explicitOutOfOrderGlyphsTokenUnicode = unicode(fontNameAndOOOGlyphsTuple[1], 'utf-8') # unicode(fontNameAndOOOGlyphsTuple[1], 'utf-8') + #explicitOutOfOrderGlyphsTokenStr = unicode.encode("%s" % explicitOutOfOrderGlyphsTokenUnicode, targetEncoding) + #explicitOutOfOrderGlyphsTokenStr = explicitOutOfOrderGlyphsTokenUnicode.decode(targetEncoding) # unicode.encode("%s" % explicitOutOfOrderGlyphsTokenUnicode, 'utf-8') + tokensOfOutOfOrderGlyphsStrList = explicitOutOfOrderGlyphsTokenUnicode.split(',') + for tokenX in tokensOfOutOfOrderGlyphsStrList: + tokensOfTupleList = tokenX.split(':') + tmpListOfOutOfOrderGlyphs.append( (unichr(ord(tokensOfTupleList[0])), unichr(ord(tokensOfTupleList[1]))) ) + + if tmpFontName not in [x[0] for x in listOfFontNamesToOutOfOrderGlyphs]: + listOfFontNamesToOutOfOrderGlyphs.append( ( tmpFontName, tmpListOfOutOfOrderGlyphs) ) + + else: + configureTranslationFailed = True + break + except: + configureTranslationFailed = True + raise + + if not (targetEncoding is None or targetEncoding == ''): + configureTranslationFailed = False + + except: + print "Error while trying to access file for encoding info: %s" % (configureFontsTranslationTextFileRelPath) + raise + configureTranslationFailed = True + + if configureTranslationFailed == True: +# targetEncoding = defaultTargetEncoding + print "Error! Could not find proper override encoding info in: %s" % (configureFontsTranslationTextFileRelPath) + sys.exit() # terminate if override Failed (Blade Runner) + # + # TODO ASDF fix this!!! + # + if(len(listOfFontNamesToOutOfOrderGlyphs) == 0): + tmpFontType = defaultSubtitlesFontName[:-4] # remove the .FON extensionFromTheName + print "Empty list for out of order glyphs. Assuming default out of order glyphs and only for the %s font" % (tmpFontType) + tmplistOfOutOfOrderGlyphs = [] + tmplistOfOutOfOrderGlyphs.append((u'\xed', u'\u0386')) # spanish i (si) + tmplistOfOutOfOrderGlyphs.append((u'\xf1', u'\xa5')) # spanish n (senor) + tmplistOfOutOfOrderGlyphs.append((u'\xe2', u'\xa6')) # a for (liver) pate + tmplistOfOutOfOrderGlyphs.append((u'\xe9', u'\xa7')) # e for (liver) pate + listOfFontNamesToOutOfOrderGlyphs.append( (tmpFontType, tmplistOfOutOfOrderGlyphs)) + print "Explicit Out Of Order Glyphs List: " , listOfFontNamesToOutOfOrderGlyphs + # arrange list properly: + # check if the list contains same item as key and value (in different pairs) + # if such case then the pair with the key should preceed the pair with the value matched, + # to avoid replacing instances of a special character (key) with a delegate (value) that will be later replaced again due to the second pair + # + for (itFontName, itOOOGlyphList) in listOfFontNamesToOutOfOrderGlyphs: + while (True): + foundMatchingPairs = False + for glyphDelegItA in itOOOGlyphList: + for glyphDelegItB in itOOOGlyphList: + if (glyphDelegItA[1] == glyphDelegItB[0] and itOOOGlyphList.index(glyphDelegItA) < itOOOGlyphList.index(glyphDelegItB)): + # swap + itamA, itamB = itOOOGlyphList.index(glyphDelegItA), itOOOGlyphList.index(glyphDelegItB) + itOOOGlyphList[itamB], itOOOGlyphList[itamA] = itOOOGlyphList[itamA], itOOOGlyphList[itamB] + foundMatchingPairs = True + break + if (foundMatchingPairs == True): + break + if(foundMatchingPairs == False): + break # the whole while loop + arrangedListOfFontNamesToOutOfOrderGlyphs.append( ( itFontName, itOOOGlyphList)) + print "Arranged Glyphs Delegates List: " , arrangedListOfFontNamesToOutOfOrderGlyphs + return + +# +# Fill the actorPropertyEntries table +def initActorPropertyEntries(): + global actorPropertyEntriesWasInit + global actorPropertyEntries + firstLine = True +# print "opening actornames" + with open("./actornames.txt") as tsv: + for line in csv.reader(tsv, dialect="excel-tab"): + #skip first line header + if firstLine == True: +# print "skippingHeader" + firstLine = False + else: + actorPropertyEntries.append(line) + actorPropertyEntriesWasInit = True + tsv.close() + +def getActorShortNameById(lookupActorId): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if int(actorEntryTmp[0]) == int(lookupActorId): + return actorEntryTmp[1] + return '' + + +def getActorFullNameById(lookupActorId): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if int(actorEntryTmp[0]) == int(lookupActorId): + return actorEntryTmp[2] + return '' + +def getActorIdByShortName(lookupActorShortName): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if actorEntryTmp[1] == lookupActorShortName: + return actorEntryTmp[0].zfill(2) + return '' + +# +# +# FOR MIX FILE +# + +# strFileName should be the full file name (including extension) +def calculateFoldHash(strFileName): + i = 0 + hash = 0 + strParam = strFileName.upper() + lenFileName = len(strParam); + while i < lenFileName and i < 12: + groupSum = 0 + # work in groups of 4 bytes + for j in range(0, 4): + # LSB first, so the four letters in the string are re-arranged (first letter goes to lower place) + groupSum >>= 8; + if (i < lenFileName): + groupSum |= (ord(strParam[i]) << 24) + i += 1 + else: # if i >= lenFileName but still haven't completed the four byte loop add 0s + groupSum |= 0 + hash = ((hash << 1) | ((hash >> 31) & 1)) + groupSum + hash &= 0xFFFFFFFF # mask here! + print (strParam +': ' +''.join('{:08X}'.format(hash))) + return hash + +# +# aux - sort by first object in list of tuples +def getSortMixFilesKey(item): + keyTmp = item[0] & 0xFFFFFFFF + + signedKeyTmp = ctypes.c_long(keyTmp).value + return signedKeyTmp +# +def outputMIX(): + # output file should be SUBTITLES.MIX + # checking with known hashes to verify calculateFoldHash + #calculateFoldHash('AR01-MIN.SET') + #calculateFoldHash('AR02-MIN.SET') + #calculateFoldHash('CLOVDIES.AUD') + #calculateFoldHash('INTRO.VQA') + + errorFound = False + outMIXFile = None + try: + outMIXFile = open("./SUBTITLES.MIX", 'wb') + except: + errorFound = True + if not errorFound: + # Write header + # 2 bytes: number of entries (NumFiles) + # TODO 4 bytes: size of data segment + # 12 * NumFiles bytes: Entry descriptors table + # 4 bytes: ID (hash) + # 4 bytes: Byte offset in Data Segment + # 4 bytes: Byte length of entry data + # TODO *Data Segment* - contains the file data. Offset from Entry Descriptors does not include header segment byte length. + # Note that the offsets are relative to the start of the body so to find the + # actual offset in the MIX you have to add the size of the header which is + # (6 + (12 * NumFiles)) + + # + # ID column should in ascending order in MIX FILES (the engine uses binary sort to search for files) + # so order the files based on ID hash + # Create a list of 3-item tuples, first item is id, second item is filename + # Then sort the list + # Then write to entry table + # + # Also filenames should be 8 characters at most and 4 more for extension to conform with specs + # ^^ this is done manually by making sure the filenames in the sheets of the excel as compliant + # Based on observations from STARTUP.MIX: + # 1) the hash ids can overflow and so lower numbers seem to appear down in the index table entries list + # -- So we sort hash but we first tranlste the unsigned key to signed with ctypes + # 2) the offsets are not necessarily sorted, meaning that the first entry in the index table won't necessarily have the 0x00000000 offset + i = 0 + mixFileEntries = [] + totalFilesDataSize = 0 + currOffsetForDataSegment = 0 # we start after header and table of index entries, from 0, (but this means that when reading the offset we need to add 6 + numOfFiles * 12). This does not concern us though. + for sheetDialogueName in supportedDialogueSheets: + sheetDialogueNameTRE = sheetDialogueName[:-4] + '.TRE' + if os.path.isfile('./' + sheetDialogueNameTRE): + entryID = calculateFoldHash(sheetDialogueNameTRE) + mixEntryfileSizeBytes = os.path.getsize('./' + sheetDialogueNameTRE) + mixFileEntries.append((entryID, sheetDialogueNameTRE, mixEntryfileSizeBytes)) + totalFilesDataSize += mixEntryfileSizeBytes + + for translatedTREFileName in [ x[0] for x in supportedTranslationSheets] : + if os.path.isfile('./' + translatedTREFileName): + entryID = calculateFoldHash(translatedTREFileName) + mixEntryfileSizeBytes = os.path.getsize('./' + translatedTREFileName) + mixFileEntries.append((entryID, translatedTREFileName, mixEntryfileSizeBytes)) + totalFilesDataSize += mixEntryfileSizeBytes + + for otherFileName in supportedOtherFilesForMix: + if os.path.isfile('./' + otherFileName): + entryID = calculateFoldHash(otherFileName) + mixEntryfileSizeBytes = os.path.getsize('./' + otherFileName) + mixFileEntries.append((entryID, otherFileName, mixEntryfileSizeBytes)) + totalFilesDataSize += mixEntryfileSizeBytes + mixFileEntries.sort(key=getSortMixFilesKey) + # + # We write num of files here. After we verified they exist + # + numOfFiles = len(mixFileEntries) + numOfFilesToWrite = pack('h',numOfFiles) # short 2 bytes + outMIXFile.write(numOfFilesToWrite) + + # This is just the data segment (after the entries index table). Adds up all the file sizes here + totalFilesDataSizeToWrite = pack('I',totalFilesDataSize) # unsigned integer 4 bytes + outMIXFile.write(totalFilesDataSizeToWrite) + + print ("Sorted Entries based on EntryId") + for mixFileEntry in mixFileEntries: + print (''.join('{:08X}'.format(mixFileEntry[0])) + ': ' + mixFileEntry[1] + ' : ' + ''.join('{:08X}'.format(mixFileEntry[2]))) + entryID = mixFileEntry[0] & 0xFFFFFFFF + entryIDToWrite = pack('I',entryID) # unsigned integer 4 bytes + outMIXFile.write(entryIDToWrite) + entryOffset = currOffsetForDataSegment # offsets have base after header and table of index entries + entryOffsetToWrite = pack('I',entryOffset) # unsigned integer 4 bytes + outMIXFile.write(entryOffsetToWrite) + entryByteLength = mixFileEntry[2] # File size + entryByteLengthToWrite = pack('I',entryByteLength) # unsigned integer 4 bytes + outMIXFile.write(entryByteLengthToWrite) + currOffsetForDataSegment += entryByteLength + # Add data segments here + errorReadingFound = False + for mixFileEntry in mixFileEntries: + try: + inEntryMIXFile = open("./"+ mixFileEntry[1], 'rb') + except: + errorReadingFound = True + if not errorReadingFound: + outMIXFile.write(inEntryMIXFile.read()) + inEntryMIXFile.close() + else: + print ("Error while reading in ENTRY file") + break + + outMIXFile.close() + print "TOTAL RESOURCES IN MIX: %d" % (numOfFiles) + return +# +# END FOR MIX FILE +# +# + +#def inputXLS(filename) + #TODO extra pass once the quotes have been updated for weird unicode characters + #TODO some ' quotes appear as \u2019 and others appear normally as '. what's that about? + #DONE manually I've replaced all weird \u2019 single quotes with ''' + #the spanish n is \xf1 -> we put it at ascii value: \xA5 -- font index 0xA6 ? + #the spanish i is \xed -> we put it at ascii value: \xA2 -- font index 0xA3 ? + #pâté + # a actual ascii value is 0xE2 in codepage 1252 -- put it in ascii value 0xA6 (165) -- font index 0xA7 + # e actual ascii value is 0xE9 in codepage 1252 -- put it in ascii value 0xA7 (166) -- font index 0xA8 + #TODO what are other characters are special? + #TODO transition to ASCII chars to store in TRE file? + #DONE manually I've replaced all one-char '...' with three dots + # TODO actors TRE has 0x49 entries, (73 names), but table of ids has 73 entries BUT the offset table (first offset is calced + 0x04, so from end of the first 4 count bytes) has 74 entries. The last entry indexes the end of file (!) + # TODO all strings are NULL terminated in the TRE file! + +def translateQuoteToAsciiProper(cellObj, pSheetName): + newQuoteReplaceSpecials = cellObj.value.encode("utf-8") + #print ('Encoded to unicode: %s ' % (newQuoteReplaceSpecials)) + newQuoteReplaceSpecials = newQuoteReplaceSpecials.decode("utf-8") + + pertinentListOfOutOfOrderGlyphs = [] + #print pSheetName + #print supportedDialogueSheets + #print defaultSubtitlesFontName[:-4] + #print [x[0] for x in listOfFontNamesToOutOfOrderGlyphs] + if pSheetName in supportedDialogueSheets and defaultSubtitlesFontName[:-4] in [x[0] for x in listOfFontNamesToOutOfOrderGlyphs]: + for (tmpFontName, tmpOOOList) in listOfFontNamesToOutOfOrderGlyphs: + if tmpFontName == defaultSubtitlesFontName[:-4]: + pertinentListOfOutOfOrderGlyphs = tmpOOOList + break + elif pSheetName in [x[0] for x in supportedTranslationSheets]: + pertinentFontType = '' + #[treAndFontTypeTuple for treAndFontTypeTuple in supportedTranslationSheets if treAndFontTypeTuple[0] == pSheetName] + for (tmpSheetName, tmpFontType) in supportedTranslationSheets: + if tmpSheetName == pSheetName: + pertinentFontType = tmpFontType + break + for (tmpFontName, tmpOOOList) in listOfFontNamesToOutOfOrderGlyphs: + if tmpFontName == pertinentFontType: + pertinentListOfOutOfOrderGlyphs = tmpOOOList + break + + #newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u0386", u"\u00A3") + for repTuple in pertinentListOfOutOfOrderGlyphs: + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(repTuple[0], repTuple[1]) + # WORKAROUND, we re-replace the spanish i delegate again here! +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u'\xa2', u'\u0386') # this is needed for spanish i because in utf-8 it's actually the u'\u0386' that's assigned to A tonomeno which is the delegate. +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u0386", u"\u00A3") +# #newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u0386", u"\u00A3") # greek alpha tonomeno -- TODO which character is this in the excel (utf value) ??? +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u00ed", u"\u00A2") # spanish i +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u00f1", u"\u00A5") # spanish n +# #newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u00A4", u"\u00A5") # spanish n +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u00e2", u"\u00A6") # a from pate -- todo this is not confirmed in-game font (but it is in our external font as of yet) +# newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u00e9", u"\u00A7") # e from pate -- todo this is not confirmed in-game font (but it is in our external font as of yet) + # other replacements. + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u2019", u"\u0027") # right single quote + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u2018", u"\u0027") # left single quote + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u2026", u"\u002e\u002e\u002e") # three dots together (changes length) + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u201D", u"\u0022") # right double quote + newQuoteReplaceSpecials = newQuoteReplaceSpecials.replace(u"\u201C", u"\u0022") # left double quote + # TODO? replace new line ??? with another char (maybe |)? + + #newQuoteReplaceSpecialsUnicode = unicode(newQuoteReplaceSpecials, 'utf-8') + #newQuoteReplaceSpecialsStr = unicode.encode("%s" % newQuoteReplaceSpecials, targetEncoding) + + #print type(newQuoteReplaceSpecials) # type is unicode + #print type(newQuoteReplaceSpecials.encode('utf-8')) # type is str +# print targetEncoding +# print newQuoteReplaceSpecials +# newQuoteReplaceSpecialsDec = newQuoteReplaceSpecials.decode(targetEncoding) + newQuoteReplaceSpecialsRetStr = '' + newQuoteReplaceSpecialsRetStr = newQuoteReplaceSpecials.encode(targetEncoding) +# try: +# newQuoteReplaceSpecialsRetStr = newQuoteReplaceSpecials.encode(targetEncoding) +# except: +# print "===============================================================================" +# print "===============================================================================" +# print "ERROR:" +# print newQuoteReplaceSpecials +# print newQuoteReplaceSpecials.encode(targetEncoding, errors='xmlcharrefreplace') +# print "===============================================================================" +# print "===============================================================================" +# newQuoteReplaceSpecialsRetStr = newQuoteReplaceSpecials.encode(targetEncoding, errors='xmlcharrefreplace') + return newQuoteReplaceSpecialsRetStr +# return newQuoteReplaceSpecialsEnStr + + +def inputXLS(filename): + global numOfSpokenQuotes + global tableOfStringIds + global tableOfStringOffsets + global tableOfStringEntries + # Open the workbook + xl_workbook = xlrd.open_workbook(filename, encoding_override="utf-8") + + + # List sheet names, and pull a sheet by name + # + # sheet_names = xl_workbook.sheet_names() + #print('Sheet Names', sheet_names) + # + #xl_sheet = xl_workbook.sheet_by_name(sheet_names[0]) + + # Or grab the first sheet by index + # (sheets are zero-indexed) + # First sheet is the in-game quotes + # + # xl_sheet = xl_workbook.sheet_by_index(0) + # + # + mergedListOfSubtitleSheetsAndTranslatedTREs = supportedDialogueSheets + [ x[0] for x in supportedTranslationSheets ] + + for sheetDialogueName in mergedListOfSubtitleSheetsAndTranslatedTREs: + xl_sheet = xl_workbook.sheet_by_name(sheetDialogueName) + if(xl_sheet is not None): + print ('Sheet name: %s' % xl_sheet.name) + numOfSpokenQuotes = xl_sheet.nrows - 2 # all rows minus the first TWO rows with headers + print ('num of spoken quotes: %d' % numOfSpokenQuotes) + # stats for debug + extremeQuotesList = [] + longestLength = 0 + predefinedLengthThreshold = 145 + quoteNumAboveThreshold = 0 + # end of stats for debug + + + absStartOfIndexTable = 4 + absStartOfOffsetTable = absStartOfIndexTable + (numOfSpokenQuotes * 4) # = 4 + 0x1577 * 4 = 4 + 0x55DC = 0x55E0 + absStartOfStringTable = absStartOfOffsetTable + ((numOfSpokenQuotes+1) * 4) # = 0x55E0 + (0x1578 * 4) = 0xABC0 + curStrStartOffset = absStartOfStringTable - 4 + newQuoteReplaceSpecialsAscii = '' + tmpQuoteID = 0 + #switchFlagShowQuote = False # for debugging + tmpStartFrame = 0 # for VQA sheets + tmpEndFrame = 0 # for VQA sheets + mode = 0 # init to unknown + if xl_sheet.name == supportedDialogueSheets[0]: + print 'IN GAME QUOTES' + mode = 1 #in-game quote + elif xl_sheet.name in supportedDialogueSheets: + print 'VQA SCENE DIALOGUE' + mode = 2 #VQA + elif xl_sheet.name in [ x[0] for x in supportedTranslationSheets ]: + print 'TRANSLATED TRE' + mode = 3 # Translated TRE + # + del tableOfStringIds[:] + del tableOfStringEntries[:] + del tableOfStringOffsets[:] + for row_idx in range(2, xl_sheet.nrows): + #print "Line %d" % (row_idx) + for col_idx in range(0, xl_sheet.ncols): + cell_obj = xl_sheet.cell(row_idx, col_idx) + # + # FOR IN-GAME QUOTES -- Iterate through columns starting from col 0. We need cols: 0, 2 + # + if mode == 1: + #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj)) + if(col_idx == 0): + #switchFlagShowQuote = False + twoTokensfirstColSplitAtDotXLS = cell_obj.value.split('.', 1) + if len(twoTokensfirstColSplitAtDotXLS) == 2: + twoTokensfirstColSplitAtDashXLS = twoTokensfirstColSplitAtDotXLS[0].split('-', 1) + if len(twoTokensfirstColSplitAtDashXLS) == 2: + tmpQuoteID = int( twoTokensfirstColSplitAtDashXLS[0]) * 10000 + int(twoTokensfirstColSplitAtDashXLS[1]) + #print ('row_idx %d. tag %s = quoteId [%d]' % (row_idx, twoTokensfirstColSplitAtDotXLS[0], tmpQuoteID)) + tableOfStringIds.append(tmpQuoteID) + #if(tmpQuoteID == 160110 or tmpQuoteID == 160010): + # switchFlagShowQuote = True + + elif(col_idx == 1) : + #if switchFlagShowQuote == True: + # print ('length: %d: %s' % (len(cell_obj.value), cell_obj.value)) + # print ('object: %s' % (cell_obj)) + # #newQuoteReplaceSpecials = cell_obj.value.decode("utf-8") # unicode(cell_obj.value, 'windows-1252') + # #print ('decoded to unicode: %s ' % (newQuoteReplaceSpecials)) # error with char xf1 + newQuoteReplaceSpecialsAscii = translateQuoteToAsciiProper(cell_obj, xl_sheet.name) + #if switchFlagShowQuote == True: + # print ('length: %d: %s' % (len(newQuoteReplaceSpecialsAscii), newQuoteReplaceSpecialsAscii)) + #print ':'.join(x.encode('hex') for x in newQuoteReplaceSpecialsAscii) # seems to work. new chars are non-printable but exist in string + + tableOfStringEntries.append(newQuoteReplaceSpecialsAscii) + tableOfStringOffsets.append(curStrStartOffset) + curStrStartOffset += (len(newQuoteReplaceSpecialsAscii) + 1) + if ( longestLength < len(newQuoteReplaceSpecialsAscii)): + longestLength = len(newQuoteReplaceSpecialsAscii) + if ( predefinedLengthThreshold < len(newQuoteReplaceSpecialsAscii)): + extremeQuotesList.append((tmpQuoteID, newQuoteReplaceSpecialsAscii)) + quoteNumAboveThreshold += 1 + #print ('row_idx %d. tag %s = quoteId [%d], length: %d: %s' % (row_idx, twoTokensfirstColSplitAtDotXLS[0], tmpQuoteID, len(newQuoteReplaceSpecialsAscii), newQuoteReplaceSpecialsAscii)) + # + # FOR VQAs -- Iterate through columns starting from col 2. We need cols: 2, 9, 10 + # + elif mode == 2: + if(col_idx == 2): # subtitle text + newQuoteReplaceSpecialsAscii = translateQuoteToAsciiProper(cell_obj, xl_sheet.name) + #print ('length: %d: %s' % (len(newQuoteReplaceSpecialsAscii), newQuoteReplaceSpecialsAscii)) + #print ':'.join(x.encode('hex') for x in newQuoteReplaceSpecialsAscii) # seems to work. new chars are non-printable but exist in string + # don't append to tableOfStringEntries yet + elif(col_idx == 9): # startFrame + #print "cell: %s" % (cell_obj.value) + tmpStartFrame = int(cell_obj.value) + elif(col_idx == 10): # endFrame + tmpEndFrame = int(cell_obj.value) + tmpQuoteID = tmpStartFrame | (tmpEndFrame << 16) # top 16 bits are end frame (up to 65536 frames which is enough) and low 16 bits are startFrame + + tableOfStringIds.append(tmpQuoteID) + tableOfStringEntries.append(newQuoteReplaceSpecialsAscii) + tableOfStringOffsets.append(curStrStartOffset) + curStrStartOffset += (len(newQuoteReplaceSpecialsAscii) + 1) + if ( longestLength < len(newQuoteReplaceSpecialsAscii)): + longestLength = len(newQuoteReplaceSpecialsAscii) + if ( predefinedLengthThreshold < len(newQuoteReplaceSpecialsAscii)): + extremeQuotesList.append((tmpQuoteID, newQuoteReplaceSpecialsAscii)) + quoteNumAboveThreshold += 1 + # + # For translated TRE sheets the id is already in first column, the text is in the next one + # + elif mode == 3: + #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj)) + if(col_idx == 0): + tmpQuoteID = int(cell_obj.value) + tableOfStringIds.append(tmpQuoteID) + elif(col_idx == 1) : + #if switchFlagShowQuote == True: + # print ('length: %d: %s' % (len(cell_obj.value), cell_obj.value)) + # print ('object: %s' % (cell_obj)) + # #newQuoteReplaceSpecials = cell_obj.value.decode("utf-8") # unicode(cell_obj.value, 'windows-1252') + # #print ('decoded to unicode: %s ' % (newQuoteReplaceSpecials)) # error with char xf1 + newQuoteReplaceSpecialsAscii = translateQuoteToAsciiProper(cell_obj, xl_sheet.name) + #if switchFlagShowQuote == True: + # print ('length: %d: %s' % (len(newQuoteReplaceSpecialsAscii), newQuoteReplaceSpecialsAscii)) + #print ':'.join(x.encode('hex') for x in newQuoteReplaceSpecialsAscii) # seems to work. new chars are non-printable but exist in string + + tableOfStringEntries.append(newQuoteReplaceSpecialsAscii) + tableOfStringOffsets.append(curStrStartOffset) + curStrStartOffset += (len(newQuoteReplaceSpecialsAscii) + 1) + if ( longestLength < len(newQuoteReplaceSpecialsAscii)): + longestLength = len(newQuoteReplaceSpecialsAscii) + if ( predefinedLengthThreshold < len(newQuoteReplaceSpecialsAscii)): + extremeQuotesList.append((tmpQuoteID, newQuoteReplaceSpecialsAscii)) + quoteNumAboveThreshold += 1 + #print ('row_idx %d. tag %s = quoteId [%d], length: %d: %s' % (row_idx, twoTokensfirstColSplitAtDotXLS[0], tmpQuoteID, len(newQuoteReplaceSpecialsAscii), newQuoteReplaceSpecialsAscii)) + + tableOfStringOffsets.append(curStrStartOffset) # the final extra offset entry + print 'Longest Length = %d, quotes above threshold (%d): %d' % (longestLength, predefinedLengthThreshold, quoteNumAboveThreshold) + for extremQuotTuple in extremeQuotesList: + print "Id: %d, Q: %s" % (extremQuotTuple[0], extremQuotTuple[1]) + # + # WRITE TO TRE FILE + # + errorFound = False + outTREFile = None + outTREFileName = sheetDialogueName[:-4] + try: + outTREFile = open("./" + outTREFileName + ".TRE", 'wb') + except: + errorFound = True + if not errorFound: + numOfSpokenQuotesToWrite = pack('I',numOfSpokenQuotes) # unsigned integer 4 bytes + outTREFile.write(numOfSpokenQuotesToWrite) + # write string IDs table + for idxe in range(0,len(tableOfStringIds)): + idOfStringToWrite = pack('I',tableOfStringIds[idxe]) # unsigned integer 4 bytes + outTREFile.write(idOfStringToWrite) + # write string offsets table + for idxe in range(0,len(tableOfStringOffsets)): + offsetOfStringToWrite = pack('I',tableOfStringOffsets[idxe]) # unsigned integer 4 bytes + outTREFile.write(offsetOfStringToWrite) + #write strings with null terminator + for idxe in range(0,len(tableOfStringEntries)): + outTREFile.write(tableOfStringEntries[idxe]) + outTREFile.write('\0') + outTREFile.close() + return +# +# +# +# ######################## +# main +# 00_0000 -- DealsInInsects dupl TLK01, TLK0A +# 00_0510 -- ThinkingOfChangingJobs-Leon dupl TLK02, TLK03 +# 00-8520 -- WhatDoYouKnow dupl TLK01, TLK0A + +# Total unique quotes seems to be 5495! +# TODO rename files in folders to conform to the underscore '_' and '-' format (a few don't -- let's have them all conforming!) +# ######################### +# +if __name__ == "__main__": + pathToQuoteExcelFile = "" + invalidSyntax = False + +# print "Len of sysargv = %s" % (len(sys.argv)) + if len(sys.argv) == 2: + if(sys.argv[1] == '--help'or sys.argv[1] == '-h'): + print "%s %s supports Blade Runner (English version, CD edition)." % (app_name_spaced, app_version) + print "Created by Praetorian of the classic adventures in Greek team." + print "Always keep backups!" + print "--------------------" + print "Preparatory steps:" + print "0. Keep actornames.txt in the same folder as this app." + print "1. Copy the BladeRunnerPCTLK.xlsx file (latest version, downloaded from Google Sheets) in some folder on your PC." + print "--------------------" + print "%s takes 1 mandatory argument:" % (app_name_spaced) + print "Valid syntax (in-game and VQA quotes): %s -x [folderpath_to_ BladeRunnerPCTLK_xlsx_file]" % (app_name) + print "1st argument is the path to the excel file with the subtitle quotes." + print "If the app finishes successfully a " + supportedDialogueSheets[0] + " and a few other .TRE files for the VQAs " + print "in the Excel file as well as a SUBTITLES.MIX file containing all of them will be created in the same folder with the app." + print "--------------------" + print "Thank you for using this app." + print "Please provide any feedback to: %s " % (company_email) + sys.exit() + elif(sys.argv[1] == '--version' or sys.argv[1] == '-v'): + print "%s %s supports Blade Runner (English version, CD edition)." % (app_name_spaced, app_version) + print "Please provide any feedback to: %s " % (company_email) + sys.exit() + else: + invalidSyntax = True + elif len(sys.argv) == 3: + if(sys.argv[1] == '-x'): + pathToQuoteExcelFile = sys.argv[2] + else: + invalidSyntax = True + + if not pathToQuoteExcelFile: + invalidSyntax = True + + if not invalidSyntax: + # parse any overrideEncoding file if exists: + initOverrideEncoding() + + + # parse the EXCEL File + # parse Actors files: + initActorPropertyEntries() +# for actorEntryTmp in actorPropertyEntries: +# print "Found actor: %s %s %s" % (actorEntryTmp[0], actorEntryTmp[1], actorEntryTmp[2]) + inputXLS(pathToQuoteExcelFile) + outputMIX() + + else: + invalidSyntax = True + + if invalidSyntax == True: + print "Invalid syntax\n Try: \n %s --help for more info \n %s --version for version info " % (app_name, app_name) + print "Valid syntax (in-game and VQA quotes): %s -x [folderpath_to_ BladeRunnerPCTLK_xlsx_file]" % (app_name) + print "1st argument is the path to the excel file with the subtitle quotes." + print "If the app finishes successfully a " + supportedDialogueSheets[0] + " and a few other .TRE files for the VQAs " + print "in the Excel file as well as a SUBTITLES.MIX file containing all of them will be created in the same folder with the app." + tmpi = 0 + for tmpArg in sys.argv: + if tmpi==0: #skip first argument + tmpi+=1 + continue + print "\nArgument: %s" % (tmpArg) + tmpi+=1 +else: + ## debug + #print '%s was imported from another module' % (app_name_spaced,) + pass diff --git a/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/actornames.txt b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/actornames.txt new file mode 100644 index 0000000000..2836d18de8 --- /dev/null +++ b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/actornames.txt @@ -0,0 +1,75 @@ +Id Short ActorDesc #skip first row +0 MCCOY McCoy +1 STEEL Steele +2 GORDO Gordo +3 DEKTO Dektora +4 GUZZA Guzza +5 CLOVI Clovis +6 LLUCY Lucy +7 IIIZO Izo +8 SADIK Sadik +9 CRAZY Crazylegs +10 LUTHE Luther +11 GRIGO Grigorian +12 TRANS Transient +13 LANCE Lance +14 BBBOB Bullet Bob +15 RUNCI Runciter +16 INSEC Insect Dealer +17 TGUAR Tyrell Guard +18 EARLQ Early Q +19 ZUBEN Zuben +20 HASAN Hasan +21 MARCU Marcus +22 MMMIA Mia +23 OLEAR Officer Leary +24 OGRAY Officer Grayford +25 HANOI Hanoi +26 BAKER Baker +27 DCLER Desk Clerk +28 HOWIE Howie Lee +29 FISHD Fish Dealer +30 KLEIN Klein +31 MURRA Murray +32 HBARK Hawker's Barkeep +33 HOLLO Holloway +34 SWALL Sergeant Walls +35 MORAJ Moraji +36 TBARD The Bard +37 PHOTG Photographer +38 DISPA Dispatcher +39 ANSWM Answering Machine +40 RAJIF Rajif +41 GKOLV Governor Kolvig +42 ERLQB Early Q Bartender +43 HPARR Hawker's Parrot +44 TAFPA Taffy Patron +45 LOCGU Lockup Guard +46 TEENA Teenager +47 HPATA Hysteria Patron A +48 HPATB Hysteria Patron B +49 HPATC Hysteria Patron C +50 SHOES Shoeshine Man +51 TYREL Tyrell +52 CCHEW Chew +53 GGAFF Gaff +54 BRYAN Bryant +55 TAFFY Taffy +56 SEBAS Sebastian +57 RACHA Rachael +58 GDOLL General Doll +59 ISABE Isabella +60 BLIMP Blimp Guy +61 NEWSC Newscaster +62 LLEON Leon +63 MALAN Male Announcer +64 FREEA Free Slot A +65 FREEB Free Slot B +66 MAGGI Maggie +67 ACTGA Actor Genwalker A +68 ACTGB Actor Genwalker B +69 ACTGC Actor Genwalker C +70 MUTAA Mutant A +71 MUTAB Mutant B +72 MUTAC Mutant C +99 MAINF Mainframe diff --git a/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/configureFontsTranslation.txt b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/configureFontsTranslation.txt new file mode 100644 index 0000000000..8b4a355ecb --- /dev/null +++ b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/configureFontsTranslation.txt @@ -0,0 +1,4 @@ +targetEncoding=windows-1253 +fontNameAndOutOfOrderGlyphs=SUBTLS_E#í:Ά,ñ:¥,â:¦,é:§,Ά:£ +fontNameAndOutOfOrderGlyphs=KIA6PT#é:ƒ,ü:– +fontNameAndOutOfOrderGlyphs=TAHOMA#é:ƒ,ü:–
\ No newline at end of file diff --git a/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/sampleCMDParameters.txt b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/sampleCMDParameters.txt new file mode 100644 index 0000000000..71dd3c6ef8 --- /dev/null +++ b/devtools/create_bladerunner/subtitles/mixResourceCreator/samples/sampleCMDParameters.txt @@ -0,0 +1 @@ +python2.7 packBladeRunnerMIXFromPCTLKXLS-04.py -x ./outSpeech-02-09-2018-1358-TranslatingComms-117PatrasLinks.xls
\ No newline at end of file |