diff options
Diffstat (limited to 'devtools/blade_runner/subtitles/quotesSpreadsheetCreator')
7 files changed, 1404 insertions, 0 deletions
diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileDecode.py b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileDecode.py new file mode 100644 index 0000000000..a46b3d8798 --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileDecode.py @@ -0,0 +1,210 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +import ctypes +from struct import * + +my_module_version = "0.50" +my_module_name = "audFileDecode" + +aud_ima_index_adjust_table = [-1, -1, -1, -1, 2, 4, 6, 8] + +# aud_ima_step_table has 89 entries +aud_ima_step_table = [ + 7, 8, 9, 10, 11, 12, 13, 14, 16, + 17, 19, 21, 23, 25, 28, 31, 34, 37, + 41, 45, 50, 55, 60, 66, 73, 80, 88, + 97, 107, 118, 130, 143, 157, 173, 190, 209, + 230, 253, 279, 307, 337, 371, 408, 449, 494, + 544, 598, 658, 724, 796, 876, 963, 1060, 1166, + 1282, 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, + 3024, 3327, 3660, 4026, 4428, 4871, 5358, 5894, 6484, + 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, 15289, + 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 ] + +aud_ws_step_table2 = [-2, -1, 0, 1] + +aud_ws_step_table4 = [ + -9, -8, -6, -5, -4, -3, -2, -1, + 0, 1, 2, 3, 4, 5, 6, 8 +] + +# (const xccTHA::byte* audio_in, short* audio_out, int& index, int& sample, int cs_chunk) +# index and sample are passed by reference and changed here... +# audio_out is definitely affected! +def aud_decode_ima_chunk(audioBufferIn, index, sample, cs_chunk): + code = -1 + delta = -1 + step = -1 + + audioBufferOut = [] + #for i in range(0, len(audioBufferIn)): + # print '%d: %d'%(i, int(audioBufferIn[i])) + + for sample_index in range (0, cs_chunk): + try: + code = audioBufferIn[sample_index >> 1] + except: + code = 0xa9 # dummy workaround because the c code is accessing an out of bounds index sometimes due to this shift here + #print "cs_chunk %d, sample_index %d, shifted %d, code: %d" % (cs_chunk, sample_index, sample_index >> 1, int(audioBufferIn[sample_index >> 1])) + #print "cs_chunk %s, sample_index %s, shifted %s, code: %s" % \ + # (''.join('{:04X}'.format(cs_chunk)), ''.join('{:02X}'.format(sample_index)), ''.join('{:02X}'.format(sample_index >> 1)), ''.join('{:04X}'.format(int(code)))) + code = code >> 4 if (sample_index & 1) else code & 0xf + step = aud_ima_step_table[index] + delta = step >> 3 + if (code & 1): + delta += step >> 2 + if (code & 2): + delta += step >> 1 + if (code & 4): + delta += step + if (code & 8): + sample -= delta + if (sample < -32768): + sample = -32768 + else: + sample += delta + if (sample > 32767): + sample = 32767 + audioBufferOut.append(ctypes.c_short( sample ).value ) + #audioBufferOut.append(sample) # it's not different from above... ctypes.c_short( sample ).value + #print "audio_out[%s]: %s" % (''.join('{:02X}'.format(sample_index)), ''.join('{:02X}'.format(audioBufferOut[sample_index]))); + index += aud_ima_index_adjust_table[code & 7] + if (index < 0): + index = 0 + elif (index > 88): + index = 88 + ## output buffer of shorts + #binDataOut = struct.pack('h'*len(audioBufferOut), *audioBufferOut) + #return (binDataOut, index, sample) + return (audioBufferOut, index, sample) +# +# +# +def aud_decode_clip8(v): + if (v < 0): + return 0 + return 0xff if (v > 0xff) else v +# +# +# + +# (const xccTHA::byte* r, char* w, int cb_s, int cb_d) +def aud_decode_ws_chunk(inputChunkBuffer, cb_s, cb_d): + outputChunkBuffer = [] + inpChBuffIter = 0 + outChBuffIter = 0 + + if (cb_s == cb_d): + # outputChunkBuffer = inputChunkBuffer[:cb_s] # memcpy(w, r, cb_s) # FIX + for mcp in range(0, cb_s): + outputChunkBuffer.append(ctypes.c_char(inputChunkBuffer[inpChBuffIter + mcp]).value) + #binDataOut = struct.pack('b'*len(outputChunkBuffer), *outputChunkBuffer) + #return binDataOut + return outputChunkBuffer + +# const xccTHA::byte* s_end = inputChunkBuffer + cb_s; # FIX + + s_end = inpChBuffIter + cb_s + sample = ctypes.c_int(0x80).value #int sample + while (inpChBuffIter < s_end): + inpChBuffIter += 1 + count = ctypes.c_char(inputChunkBuffer[inpChBuffIter] & 0x3f).value # char count + switchKey = inputChunkBuffer[inpChBuffIter - 1] >> 6 # inputChunkBuffer[-1] # b[-1] is *(b - 1) + if switchKey == 0: + count += 1 + for iter in range (count, 0, -1): + inpChBuffIter += 1 + code = ctypes.c_int(inputChunkBuffer[inpChBuffIter]).value # int code + # assignment in C was right to left so: + # *(outputChunkBuffer++) = sample = clip8(sample + aud_ws_step_table2[code & 3]) + # is: + # *(outputChunkBuffer++) = (sample = clip8(sample + aud_ws_step_table2[code & 3])) + # which is equivalent to these two commands: + # sample = clip8(sample + aud_ws_step_table2[code & 3]) + # *(outputChunkBuffer++) = sample + # SO: + sample = aud_decode_clip8(sample + aud_ws_step_table2[code & 3]) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + sample = aud_decode_clip8(sample + aud_ws_step_table2[code >> 2 & 3]) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + sample = aud_decode_clip8(sample + aud_ws_step_table2[code >> 4 & 3]) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + sample = aud_decode_clip8(sample + aud_ws_step_table2[code >> 6]) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + elif switchKey == 1: + count += 1 + for iter in range (count, 0, -1): + inpChBuffIter += 1 + code = inputChunkBuffer[inpChBuffIter] # int code + sample += aud_ws_step_table4[code & 0xf] + sample = aud_decode_clip8(sample) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + sample += aud_ws_step_table4[code >> 4] + sample = aud_decode_clip8(sample) + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + elif switchKey == 2: + if (count & 0x20): + #sample += static_cast<char>(count << 3) >> 3 + #*(outputChunkBuffer++) = sample + sample += ((count & 0xFF) << 3 ) >> 3 + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += 1 + else: + count += 1 + # memcpy(outputChunkBuffer, inputChunkBuffer, count) # FIX + for mcp in range(0, count): + outputChunkBuffer.append(ctypes.c_char(inputChunkBuffer[inpChBuffIter + mcp]).value) + inpChBuffIter += count + outChBuffIter += count + sample = inputChunkBuffer[inpChBuffIter - 1] + else: + count += 1 + # memset(outputChunkBuffer, sample, ++count) + for mst in range(0, count): + outputChunkBuffer.append(ctypes.c_char(sample).value) + outChBuffIter += count; + # output buffer of chars + #binDataOut = struct.pack('b'*len(outputChunkBuffer), *outputChunkBuffer) + #return binDataOut + return outputChunkBuffer + +# +# +# + +class audFileDecode: + m_index = -1 + m_sample = -1 + + def __init__(self, index = 0, sample = 0): + self.m_index = index; + self.m_sample = sample; + return + + def index(self): + return self.m_index + + # (const xccTHA::byte* audio_in, short* audio_out, int cs_chunk) + def decode_chunk(self, audio_in, cs_chunk): + (audio_Out, outIndex, outSample) = aud_decode_ima_chunk(audio_in, self.m_index, self.m_sample, cs_chunk) + self.m_index = outIndex + self.m_sample = outSample + return audio_Out + +if __name__ == '__main__': + # main() + print "Running %s as main module" % (my_module_name) + decodeInstance = audFileDecode() + +else: + #debug + #print "Running %s imported from another module" % (my_module_name) + pass +
\ No newline at end of file diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileLib.py b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileLib.py new file mode 100644 index 0000000000..1adf4bc969 --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/audFileLib.py @@ -0,0 +1,289 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +import os, sys, shutil +import wave +import struct +from struct import * +from audFileDecode import * +import ctypes + +my_module_version = "0.50" +my_module_name = "audFileLib" + +#constants +aud_chunk_id = 0x0000deaf +SIZE_OF_AUD_HEADER_IN_BYTES = 12 +SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES = 8 + +class AudHeader: + samplerate = -1 #// Frequency // int16_t // TODO should be unsigned (?) + size_in = -1 #// Size of file (without header) // int32_t // TODO should be unsigned (?) + size_out = -1 #// Size of output data // int32_t // TODO should be unsigned (?) + flags = -1 #// bit 0=stereo, bit 1=16bit // int8_t + compression = -1 #// 1=WW compressed, 99=IMA ADPCM (0x63) // int8_t + + def __init__(self): + return + + +#//The rest of the AUD files is divided in chunks. These are usually 512 +#//bytes long, except for the last one. +class AudChunkHeader: + size_in = -1 #// Size of compressed data // int16_t // TODO should be unsigned (?) + size_out = -1 #// Size of output data // int16_t // TODO should be unsigned (?) + id = 0x0000FFFF #// Always 0x0000DEAF // int32_t + + def __init__(self): + return + +# +# +# +class audFile: + m_header = AudHeader() + def __init__(self): + return + + # std::fstream& fs, AudFileNS::pos_type startAudFilepos, AudFileNS::pos_type endAudFilepos, const std::string& filename + def extract_as_wav(self, audBytesBuff, filename): + print "Saving to wav: " + filename + + cvirtualBinaryD = self.decode(audBytesBuff) +# TODO DEBUG REMOVED FOR NOW. TODO RESTORE THIS!!! +# if (not cvirtualBinaryD): +# return 1 + + cb_sample = self.get_cb_sample() + cs_remaining = self.get_c_samples() + + waveWritFile = wave.open(filename, 'wb') + waveWritFile.setnchannels(self.get_c_channels()) + waveWritFile.setsampwidth(cb_sample) + waveWritFile.setframerate(self.get_samplerate()) + waveWritFile.setnframes(cs_remaining) + #waveWritFile.setcomptype(None, '') + waveWritFile.writeframesraw(cvirtualBinaryD) + waveWritFile.close() + +# t_wav_header header; +# memset(&header, 0, sizeof(t_wav_header)); +# header.file_header.id = wav_file_id; // # "RIFF" +# header.file_header.size = sizeof(header) - sizeof(header.file_header) + (cs_remaining << 1); +# header.form_type = wav_form_id; // # "WAVE" +# header.format_chunk.header.id = wav_format_id; // #"fmt " +# header.format_chunk.header.size = sizeof(header.format_chunk) - sizeof(header.format_chunk.header); +# header.format_chunk.formattag = 1; +# header.format_chunk.c_channels = 1; +# header.format_chunk.samplerate = get_samplerate(); +# header.format_chunk.byterate = cb_sample * get_samplerate(); +# header.format_chunk.blockalign = cb_sample; +# header.format_chunk.cbits_sample = cb_sample << 3; +# header.data_chunk_header.id = wav_data_id; # "data" +# header.data_chunk_header.size = cb_sample * cs_remaining; +# error = f.write(&header, sizeof(t_wav_header)); +# return error ? error : f.write(d); + return 0 # TODO fix + + + def loadAudFile(self, audBytesBuff, maxLength): + offsInAudFile = 0 + tmpTuple = struct.unpack_from('h', audBytesBuff, offsInAudFile) + self.header().samplerate = tmpTuple[0] + offsInAudFile += 2 + tmpTuple = struct.unpack_from('i', audBytesBuff, offsInAudFile) + self.header().size_in = tmpTuple[0] + offsInAudFile += 4 + tmpTuple = struct.unpack_from('i', audBytesBuff, offsInAudFile) + self.header().size_out = tmpTuple[0] + offsInAudFile += 4 + tmpTuple = struct.unpack_from('b', audBytesBuff, offsInAudFile) + self.header().flags = tmpTuple[0] + offsInAudFile += 1 + tmpTuple = struct.unpack_from('b', audBytesBuff, offsInAudFile) + self.header().compression = tmpTuple[0] + offsInAudFile += 1 + print "samplerate: %d\tsizeIn: %d\tsizeOut: %d\tflags: %d\tcompression: %d" % (self.get_samplerate(), self.header().size_in, self.header().size_out, self.header().flags, self.header().compression) + + if self.get_samplerate() < 8000 or self.get_samplerate() > 48000 or self.header().size_in > (maxLength - SIZE_OF_AUD_HEADER_IN_BYTES ): + print "AUD HEADER SIZE ERROR::2" + return False + else: + if self.header().compression == 1: + if (self.header().flags != 0): + return False + elif self.header().compression == 0x63: + if (self.header().flags != 2): + return False + return True + + # int AudFile::get_chunk_header(int i, std::fstream& fs, AudFileNS::pos_type startAudFilepos, AudFileNS::pos_type endAudFilepos, AudChunkHeader& outAudChunkHeader) + def get_chunk_header(self, chunkIdx, inAudFileBytesBuffer, inAudFileSize ): + #fs.seekg(int(startAudFilepos) + int(SIZE_OF_AUD_HEADER_IN_BYTES), fs.beg); + #AudFileNS::pos_type rAudPos; + #rAudPos = fs.tellg(); + outAudChunkHeader = AudChunkHeader() + rAudPos = SIZE_OF_AUD_HEADER_IN_BYTES + + #AudChunkHeader tmpInremediateChunkheader; + tmpInremediateChunkheader = AudChunkHeader() + #while (i--) # value of i is decreased after checked by while loop + while(chunkIdx > 0): + chunkIdx -= 1 + if (rAudPos + SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES > inAudFileSize): + return (-1, rAudPos, None) + + tmpAudFileOffset = rAudPos + tmpTuple = struct.unpack_from('h', inAudFileBytesBuffer, tmpAudFileOffset) + tmpInremediateChunkheader.size_in = tmpTuple[0] + tmpAudFileOffset += 2 + tmpTuple = struct.unpack_from('h', inAudFileBytesBuffer, tmpAudFileOffset) + tmpInremediateChunkheader.size_out = tmpTuple[0] + tmpAudFileOffset += 2 + tmpTuple = struct.unpack_from('i', inAudFileBytesBuffer, tmpAudFileOffset) + tmpInremediateChunkheader.id = tmpTuple[0] + tmpAudFileOffset += 4 + #fs.read((char*)&tmpInremediateChunkheader, SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES); + rAudPos += SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES + tmpInremediateChunkheader.size_in + #fs.seekg(int(rAudPos), fs.beg); + + if (rAudPos + SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES > inAudFileSize ): + return (-1, rAudPos, None) + # write to FINAL output chunk header + tmpAudFileOffset = rAudPos + tmpTuple = struct.unpack_from('h', inAudFileBytesBuffer, tmpAudFileOffset) + outAudChunkHeader.size_in = tmpTuple[0] + tmpAudFileOffset += 2 + tmpTuple = struct.unpack_from('h', inAudFileBytesBuffer, tmpAudFileOffset) + outAudChunkHeader.size_out = tmpTuple[0] + tmpAudFileOffset += 2 + tmpTuple = struct.unpack_from('i', inAudFileBytesBuffer, tmpAudFileOffset) + outAudChunkHeader.id = tmpTuple[0] + tmpAudFileOffset += 4 + #fs.read((char*)&outAudChunkHeader, SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES); + if (rAudPos + SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES + outAudChunkHeader.size_in > inAudFileSize): + return (-1, rAudPos, None) + rAudPos += SIZE_OF_AUD_CHUNK_HEADER_IN_BYTES + return (0, rAudPos, outAudChunkHeader) # //reinterpret_cast<const AudChunkHeader*>(r); + + # int AudFile::get_chunk_data(int i, std::fstream& fs, int sizeToRead, AudFileNS::byte* byteChunkDataPtr) + def get_chunk_data(self, inAudFileBytesBuffer, startOffs, sizeToRead): + #fs.read((char*)byteChunkDataPtr, sizeToRead) + outChunkDataLst = [] + #print "startOffs %d, sizeToRead %d" % (startOffs, sizeToRead) + for i in range(startOffs, startOffs + sizeToRead): + #outChunkDataLst.append(ctypes.c_char(inAudFileBytesBuffer[i]).value) + #outChunkDataLst.append(ctypes.c_byte(inAudFileBytesBuffer[i]).value) + tmpTuple = struct.unpack_from('b', inAudFileBytesBuffer, i) + outChunkDataLst.append(tmpTuple[0]) + #byteChunkDataOut = struct.pack('b'*len(outChunkDataLst), *outChunkDataLst) + #return (0, byteChunkDataOut) + return (0, outChunkDataLst) + + + # std::fstream& fs, AudFileNS::pos_type startAudFilepos, AudFileNS::pos_type endAudFilepos + # returned Cvirtual_binary + def decode(self, audBytesBuff): + # The * operator unpacks an argument list. It allows you to call a function with the list items as individual arguments. + # binDataOut = struct.pack('i'*len(data), *data) + print "DECODING..." +# Cvirtual_binary d; + binaryDataOutLst = [] + binaryDataOutBuff = None + cb_audio = self.get_cb_sample() * self.get_c_samples() # int cb_audio - basically this should be the size_out + if self.header().compression == 1: + # write_start allocates space for virtualBinary + # AudFileNS::byte* w = d.write_start(cb_audio); + errGetChunk = 0 # int errGetChunk + #for (int chunk_i = 0; w != d.data_end(); chunk_i++) + chunk_i = 0 + wIndex = 0 + while (wIndex < cb_audio): + #AudChunkHeader out_chunk_header; + #out_chunk_header = AudChunkHeader() + (errGetChunk, bufferDataPos, out_chunk_header) = self.get_chunk_header(chunk_i, audBytesBuff, len(audBytesBuff)) + if errGetChunk != 0: +# print "Error OR End file case while getting uncompressed chunk header!" + break + #print "Get uncompressed chunk header returned:: %d " % (out_chunk_header.id) + #Cvirtual_binary out_chunk_data; + #AudFileNS::byte* byteChunkDataPtr = out_chunk_data.write_start(out_chunk_header.size_in); + (errorGCD, byteChunkDataLst) = self.get_chunk_data(audBytesBuff, bufferDataPos, out_chunk_header.size_in) + # export decoded chunk to w (output) buffer (of CHARS) at the point where we're currently at (so append there) + decodedAudioChunkAsLst = aud_decode_ws_chunk(byteChunkDataLst, out_chunk_header.size_in, out_chunk_header.size_out) + binaryDataOutLst.extend(decodedAudioChunkAsLst) + wIndex += out_chunk_header.size_out + chunk_i += 1 + binaryDataOutBuff = struct.pack('b'*len(binaryDataOutLst), *binaryDataOutLst) + elif self.header().compression == 0x63: + decodeInstance = audFileDecode(); + #decodeInstance.init(); + #AudFileNS::byte* w = d.write_start(cb_audio); + errGetChunk = 0 # int errGetChunk + # for (int chunk_i = 0; w != d.data_end(); chunk_i++) + chunk_i = 0 + wIndex = 0 + while (wIndex < cb_audio): + #print("chunkI: %d\t Windex: %d\t cb_audio: %d") % (chunk_i,wIndex,cb_audio) + #AudChunkHeader out_chunk_header; + #out_chunk_header = AudChunkHeader() + #errGetChunk = self.get_chunk_header(chunk_i, fs, startAudFilepos, endAudFilepos, out_chunk_header); + (errGetChunk, bufferDataPos, out_chunk_header) = self.get_chunk_header(chunk_i, audBytesBuff, len(audBytesBuff)) + if errGetChunk != 0: + print "Error OR End file case while getting COMPRESSED chunk header!" + break + #print "Get COMPRESSED chunk header returned:: headerInSize: %d headerOutSize: %d id: %d" % (out_chunk_header.size_in, out_chunk_header.size_out, out_chunk_header.id) + #Cvirtual_binary out_chunk_data; + #AudFileNS::byte* byteChunkDataPtr = out_chunk_data.write_start(out_chunk_header.size_in); + (errorGCD, byteChunkDataLst) = self.get_chunk_data(audBytesBuff, bufferDataPos, out_chunk_header.size_in) + # export decoded chunk to w (output) buffer (of SHORTS) at the point where we're currently at (so append there) + #print "byteChunkDataLst len: %d, size_in was: %d" % (len(byteChunkDataLst), out_chunk_header.size_in) + decodedAudioChunkAsLst = decodeInstance.decode_chunk(byteChunkDataLst, out_chunk_header.size_out / self.get_cb_sample()); + binaryDataOutLst.extend(decodedAudioChunkAsLst) + wIndex += out_chunk_header.size_out + #print("new Windex: %d\t cb_audio: %d") % (wIndex,cb_audio) + chunk_i += 1 + binaryDataOutBuff = struct.pack('h'*len(binaryDataOutLst), *binaryDataOutLst) + return binaryDataOutBuff + + def header(self): + return self.m_header + + def get_c_samples(self): + return self.m_header.size_out / self.get_cb_sample() + + def get_samplerate(self): + return self.m_header.samplerate; + + # flag bit 0 is stereo(set) mono(clear) + def get_c_channels(self): + return 2 if (self.m_header.flags & 0x01) else 1; + + # flag bit 1 is 16bit(set) 8bit (clear) + def get_cb_sample(self): + return 2 if (self.m_header.flags & 0x02) else 1 +# +# +# +if __name__ == '__main__': + # main() + print "Running %s as main module" % (my_module_name) + # assumes a file of name 000000.AUD in same directory + inAUDFile = None + errorFound = False + try: + inAUDFile = open(os.path.join('.','00000000.AUD'), 'rb') + except: + errorFound = True + print "Unexpected error:", sys.exc_info()[0] + raise + if not errorFound: + allOfAudFileInBuffer = inAUDFile.read() + audFileInstance = audFile() + audFileInstance.loadAudFile(allOfAudFileInBuffer, len(allOfAudFileInBuffer)) + audFileInstance.extract_as_wav(allOfAudFileInBuffer, './tmp.wav') + inAUDFile.close() +else: + #debug + #print "Running %s imported from another module" % (my_module_name) + pass
\ No newline at end of file diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/actornames.txt b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/actornames.txt new file mode 100644 index 0000000000..2836d18de8 --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/actornames.txt @@ -0,0 +1,75 @@ +Id Short ActorDesc #skip first row +0 MCCOY McCoy +1 STEEL Steele +2 GORDO Gordo +3 DEKTO Dektora +4 GUZZA Guzza +5 CLOVI Clovis +6 LLUCY Lucy +7 IIIZO Izo +8 SADIK Sadik +9 CRAZY Crazylegs +10 LUTHE Luther +11 GRIGO Grigorian +12 TRANS Transient +13 LANCE Lance +14 BBBOB Bullet Bob +15 RUNCI Runciter +16 INSEC Insect Dealer +17 TGUAR Tyrell Guard +18 EARLQ Early Q +19 ZUBEN Zuben +20 HASAN Hasan +21 MARCU Marcus +22 MMMIA Mia +23 OLEAR Officer Leary +24 OGRAY Officer Grayford +25 HANOI Hanoi +26 BAKER Baker +27 DCLER Desk Clerk +28 HOWIE Howie Lee +29 FISHD Fish Dealer +30 KLEIN Klein +31 MURRA Murray +32 HBARK Hawker's Barkeep +33 HOLLO Holloway +34 SWALL Sergeant Walls +35 MORAJ Moraji +36 TBARD The Bard +37 PHOTG Photographer +38 DISPA Dispatcher +39 ANSWM Answering Machine +40 RAJIF Rajif +41 GKOLV Governor Kolvig +42 ERLQB Early Q Bartender +43 HPARR Hawker's Parrot +44 TAFPA Taffy Patron +45 LOCGU Lockup Guard +46 TEENA Teenager +47 HPATA Hysteria Patron A +48 HPATB Hysteria Patron B +49 HPATC Hysteria Patron C +50 SHOES Shoeshine Man +51 TYREL Tyrell +52 CCHEW Chew +53 GGAFF Gaff +54 BRYAN Bryant +55 TAFFY Taffy +56 SEBAS Sebastian +57 RACHA Rachael +58 GDOLL General Doll +59 ISABE Isabella +60 BLIMP Blimp Guy +61 NEWSC Newscaster +62 LLEON Leon +63 MALAN Male Announcer +64 FREEA Free Slot A +65 FREEB Free Slot B +66 MAGGI Maggie +67 ACTGA Actor Genwalker A +68 ACTGB Actor Genwalker B +69 ACTGC Actor Genwalker C +70 MUTAA Mutant A +71 MUTAB Mutant B +72 MUTAC Mutant C +99 MAINF Mainframe diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/outSpeech-01-11-2018-1414.xls b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/outSpeech-01-11-2018-1414.xls Binary files differnew file mode 100644 index 0000000000..1a70a220c6 --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/outSpeech-01-11-2018-1414.xls diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/sampleCMDParameters.txt b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/sampleCMDParameters.txt new file mode 100644 index 0000000000..a41e6c291a --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/samples/sampleCMDParameters.txt @@ -0,0 +1 @@ +python2.7 sortBladeRunnerWavs02.py -op F:\WORKSPACE_\BladeRunnerExtrTools\br-mixer-master\data\WAV -ip H:/Games/BladeRunner -xtre
\ No newline at end of file diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/sortBladeRunnerWavs02.py b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/sortBladeRunnerWavs02.py new file mode 100644 index 0000000000..4bc5bb7e23 --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/sortBladeRunnerWavs02.py @@ -0,0 +1,722 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +# Created by Praetorian (ShadowNate) for Classic Adventures in Greek +# classic.adventures.in.greek@gmail.com +# +# TODO update README +# TODO test recreation of TRE file (Especially in Credits which the original has a few special characters (font delegates)) +# +# DONE Add code and switch option: to get the blade runner installation directory as input, then find the TLK files and extract them with proper naming +# DONE fix proper names for sheets as per latest code changes +# +import os, sys, shutil +from os import walk, errno +import xlwt +import csv +import os.path +from xlwt import * +from audFileLib import * +from treFileLib import * + +# encoding=utf8 +#reload(sys) +#sys.setdefaultencoding('utf8') + +company_email = "classic.adventures.in.greek@gmail.com" +app_version = "0.50" +app_name = "sortBladeRunnerWavs" +app_name_spaced = "Sort Blade Runner Audio Speech Files" +stringReplacementForRootFolderWithExtractedFiles = "" +numReplaceStartingCharacters = 0 + +OUTPUT_XLS_FILENAME = 'out.xls' +OUTPUT_XLS_QUOTES_SHEET = 'INGQUO_E.TRE' + +supportedTLKInputFiles = [('1.TLK', 'TLK01'), ('2.TLK', 'TLK02'), ('3.TLK', 'TLK03'), ('A.TLK', 'TLK0A'), ('SPCHSFX.TLK', 'TLKSPCHSFX')] +supportedMIXInputFiles = ['STARTUP.MIX'] +# 15 TRE files +supportedExportedTREFiles = ['CLUES.TRE','ACTORS.TRE','CRIMES.TRE','CLUETYPE.TRE','KIA.TRE','SPINDEST.TRE','VK.TRE','OPTIONS.TRE','DLGMENU.TRE','ENDCRED.TRE','HELP.TRE','SCORERS.TRE','KIACRED.TRE','ERRORMSG.TRE','AUTOSAVE.TRE'] + +wavfiles = [] +wavfilesNoDups = [] +actorPropertyEntries = [] #[0]:id, [1]:ShortHand Name [2]:Full Name +actorPropertyEntriesWasInit = False + + +# strFileName should be the full file name (including extension) +def calculateFoldHash(strFileName): + i = 0 + hash = 0 + strParam = strFileName.upper() + lenFileName = len(strParam); + while i < lenFileName and i < 12: + groupSum = 0 + # work in groups of 4 bytes + for j in range(0, 4): + # LSB first, so the four letters in the string are re-arranged (first letter goes to lower place) + groupSum >>= 8; + if (i < lenFileName): + groupSum |= (ord(strParam[i]) << 24) + i += 1 + else: # if i >= lenFileName but still haven't completed the four byte loop add 0s + groupSum |= 0 + hash = ((hash << 1) | ((hash >> 31) & 1)) + groupSum + hash &= 0xFFFFFFFF # mask here! + #print (strParam +': ' +''.join('{:08X}'.format(hash))) + return hash + +# Fill the actorPropertyEntries table +def initActorPropertyEntries(): + global actorPropertyEntriesWasInit + global actorPropertyEntries + firstLine = True +# print "opening actornames" + with open("./actornames.txt") as tsv: + for line in csv.reader(tsv, dialect="excel-tab"): + #skip first line header + if firstLine == True: +# print "skippingHeader" + firstLine = False + else: + actorPropertyEntries.append(line) + actorPropertyEntriesWasInit = True + tsv.close() + +def getActorShortNameById(lookupActorId): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if int(actorEntryTmp[0]) == int(lookupActorId): + return actorEntryTmp[1] + return '' + + +def getActorFullNameById(lookupActorId): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if int(actorEntryTmp[0]) == int(lookupActorId): + return actorEntryTmp[2] + return '' + +def getActorIdByShortName(lookupActorShortName): + global actorPropertyEntriesWasInit + global actorPropertyEntries + if not actorPropertyEntriesWasInit: + return '' + else: + for actorEntryTmp in actorPropertyEntries: + if actorEntryTmp[1] == lookupActorShortName: + return actorEntryTmp[0].zfill(2) + return '' + +def getActorShortNameAndLocalQuoteIdByAUDHashID(audHashId): + actorId = 0 + actorShortName = '' + actorLocalQuoteId = 0 + if not actorPropertyEntriesWasInit: + print "Error actor properties were not initialized!" + return (actorId, actorShortName, actorLocalQuoteId) + + for actorEntryTmp in actorPropertyEntries: + if( (audHashId - (int(actorEntryTmp[0]) * 10000) ) >= 0) and ((audHashId - (int(actorEntryTmp[0]) * 10000)) < 10000): + actorId = int(actorEntryTmp[0]) + actorShortName = actorEntryTmp[1] + actorLocalQuoteId = audHashId - (actorId * 10000) + return (actorId, actorShortName, actorLocalQuoteId) + return (actorId, actorShortName, actorLocalQuoteId) + +# Aux. Ensure existence of output directory +def ensure_dir(directory): + try: + os.makedirs(directory) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +# +# Reading in the INPUT TLK files and checking all the AUD file properties +# +def inputTLKsExtract(inputTLKpath, outputWAVpath): + # try to open all TLK file entries from supportedTLKInputFiles + # then per TLK file + # create an output folder in the OUTPUT PATH named TLK## for the 1, 2, 3 TLK and TLKSPCHSFX for the SPCHSFX.TLK + # printout: + # total entries + # total data size + # and per entry the + # fileID + # segment offset + # file size + print "Checking in %s for TLK files to extract to %s" % (inputTLKpath, outputWAVpath) + inputTLKFilesFound = [] + # breaking after first for loop yields only the top directory files, which is what we want + for (dirpath, dirnames, filenames) in walk(inputTLKpath): + for filename in filenames: + for tlkTuple in supportedTLKInputFiles: + if filename.upper() == tlkTuple[0]: + inputTLKFilesFound.append(tlkTuple) + break + for tmpTLKfileTuple in inputTLKFilesFound: + print "Found TLK: %s" % ('"' + inputTLKpath + tmpTLKfileTuple[0] + '"') + errorFound = False + inTLKFile = None + # + # Create output folder if not exists at output path + print "Ensuring output directory %s" % (os.path.join(outputWAVpath, tmpTLKfileTuple[1] )) + ensure_dir(os.path.join(outputWAVpath, tmpTLKfileTuple[1] ) ) + try: + inTLKFile = open(os.path.join(inputTLKpath,tmpTLKfileTuple[0]), 'rb') + except: + errorFound = True + print "Unexpected error:", sys.exc_info()[0] + raise + if not errorFound: + tmpBuff = inTLKFile.read(2) + # H: unsigned short (2 bytes) followed by I: unsigned int (4 bytes) + tlkFileEntriesNumTuple = struct.unpack('H', tmpBuff) + numOfEntriesToExtract = tlkFileEntriesNumTuple[0] + tmpBuff = inTLKFile.read(4) + tlkFileDataSegmentSizeTuple = struct.unpack('I', tmpBuff) + allTlkFileSize = tlkFileDataSegmentSizeTuple[0] + inTLKFile.seek(0, 2) # go to file end + allActualBytesInMixFile = inTLKFile.tell() + inTLKFile.seek(6, 0) # go to start of table of TLK file entries (right after the 6 bytes header) + # 2 + 4 = 6 bytes short MIX header + # 12 bytes per TLK entry in entries table + # quick size validation + print "Entries: %d, data segment %d bytes" % (numOfEntriesToExtract, allTlkFileSize) + if allActualBytesInMixFile != 2 + 4 + 12 * numOfEntriesToExtract + allTlkFileSize: + print "Error: TLK file size mismatch with reported size in header for %s!" % (tmpTLKfileTuple[0]) + else: + # + # 12 bytes per entry + # 4 bytes: ID + # 4 bytes: Offset in data segment + # 4 bytes: Size of data + # + for i in range(0, numOfEntriesToExtract): + inTLKFile.seek(2 + 4 + 12*i) + tmpBuff = inTLKFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + idOfAUDEntry = tmpRdTuple[0] + tmpBuff = inTLKFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + offsetOfAUDEntry = tmpRdTuple[0] + tmpBuff = inTLKFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + sizeOfAUDEntry = tmpRdTuple[0] + print "Entry: %s, offset %s, data segment %s bytes" % (''.join('{:08X}'.format(idOfAUDEntry)), ''.join('{:08X}'.format(offsetOfAUDEntry)),''.join('{:08X}'.format(sizeOfAUDEntry))) + # + # put file in AUD object + # do we need AUD decode? + # create WAV from AUD + # write WAV to appropriate output folder + # Figure out proper naming for file + # then: + # AudFile aud; + # aud.loadAudFile(fs); (fs is file stream) + # aud.extract_as_wav(fs, offset, offset + int(sizeof(AudHeader)) + aud.header().size_in, target); + # + # + inTLKFile.seek(2 + 4 + 12*numOfEntriesToExtract + offsetOfAUDEntry) + if(offsetOfAUDEntry + sizeOfAUDEntry > allTlkFileSize): + print "Error: AUD file size mismatch with reported size in entry header!" + else: + audFileBuffer = inTLKFile.read(sizeOfAUDEntry) + if (len(audFileBuffer) == sizeOfAUDEntry): + # load Aud file + thisAudFile = audFile() + if (thisAudFile.loadAudFile(audFileBuffer, allTlkFileSize)): + # print "AUD file load successful!" + # find + # print "Emulating Wav write to appropriate folder..." + (actorID, actorSName, localQuoteId) = getActorShortNameAndLocalQuoteIdByAUDHashID(idOfAUDEntry) + targetSimpleFileName = actorSName + '_' + str(localQuoteId).zfill(4) + '_' + ''.join('{:08X}'.format(idOfAUDEntry)).upper()+'.WAV' + #print os.path.join(outputWAVpath, tmpTLKfileTuple[1], targetSimpleFileName) + if not os.path.isfile(os.path.join(outputWAVpath, tmpTLKfileTuple[1], targetSimpleFileName) ): + thisAudFile.extract_as_wav(audFileBuffer, os.path.join(outputWAVpath, tmpTLKfileTuple[1], targetSimpleFileName) ) + else: + print "Output file %s already exists. Skipping..." % (os.path.join(outputWAVpath, tmpTLKfileTuple[1], targetSimpleFileName)) + else: + print "Error while LOADING aud file!" + else: + print "Error while reading AUD file %s into mem buffer" % (''.join('{:08X}'.format(idOfAUDEntry))) + inTLKFile.close() + + + # SYS EXIT IS HERE ONLY FOR DEBUG PURPOSES OF PARSING TLK FILES - SHOULD BE COMMENTED OUT NORMALLY + # sys.exit(0) + return + +def inputMIXExtractTREs(inputMIXpath, excelOutBook = None): + print "Checking in %s for MIX files to extract TRE's from" % (inputMIXpath) + inputMIXFilesFound = [] + # breaking after first for loop yields only the top directory files, which is what we want + for (dirpath, dirnames, filenames) in walk(inputMIXpath): + for filename in filenames: + for mixFileName in supportedMIXInputFiles: + if filename.upper() == mixFileName: + inputMIXFilesFound.append(mixFileName) + break + for tmpMIXfileName in inputMIXFilesFound: + print "Found MIX: %s" % ('"' + inputMIXpath + tmpMIXfileName + '"') + errorFound = False + inMIXFile = None + # + try: + inMIXFile = open(os.path.join(inputMIXpath,tmpMIXfileName), 'rb') + except: + errorFound = True + print "Unexpected error:", sys.exc_info()[0] + raise + if not errorFound: + totalTREs = 0 + tmpBuff = inMIXFile.read(2) + # H: unsigned short (2 bytes) followed by I: unsigned int (4 bytes) + mixFileEntriesNumTuple = struct.unpack('H', tmpBuff) + numOfEntriesToExtract = mixFileEntriesNumTuple[0] + tmpBuff = inMIXFile.read(4) + mixFileDataSegmentSizeTuple = struct.unpack('I', tmpBuff) + allMixFileSize = mixFileDataSegmentSizeTuple[0] + inMIXFile.seek(0, 2) # go to file end + allActualBytesInMixFile = inMIXFile.tell() + inMIXFile.seek(6, 0) # go to start of table of MIX file entries (right after the 6 bytes header) + # 2 + 4 = 6 bytes short MIX header + # 12 bytes per MIX entry in entries table + # quick size validation + print "Entries: %d, data segment %d bytes" % (numOfEntriesToExtract, allMixFileSize) + if allActualBytesInMixFile != 2 + 4 + 12 * numOfEntriesToExtract + allMixFileSize: + print "Error: MIX file size mismatch with reported size in header for %s!" % (tmpMIXfileName) + else: + # + # 12 bytes per entry + # 4 bytes: ID + # 4 bytes: Offset in data segment + # 4 bytes: Size of data + # + for i in range(0, numOfEntriesToExtract): + foundTREFile = False + currTreFileName = 'UNKNOWN.TRE' + inMIXFile.seek(2 + 4 + 12*i) + tmpBuff = inMIXFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + idOfMIXEntry = tmpRdTuple[0] + tmpBuff = inMIXFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + offsetOfMIXEntry = tmpRdTuple[0] + tmpBuff = inMIXFile.read(4) + tmpRdTuple = struct.unpack('I', tmpBuff) + sizeOfMIXEntry = tmpRdTuple[0] + + for suppTREFileName in supportedExportedTREFiles: + if(idOfMIXEntry == calculateFoldHash(suppTREFileName)): + foundTREFile = True + currTreFileName = suppTREFileName + break + + if (foundTREFile == True): + print "Entry Name: %s, Entry ID: %s, offset %s, data segment %s bytes" % (currTreFileName, ''.join('{:08X}'.format(idOfMIXEntry)), ''.join('{:08X}'.format(offsetOfMIXEntry)),''.join('{:08X}'.format(sizeOfMIXEntry))) + # + # IF TRE FILE: + # put file in TRE object + # + # + inMIXFile.seek(2 + 4 + 12*numOfEntriesToExtract + offsetOfMIXEntry) + if(offsetOfMIXEntry + sizeOfMIXEntry > allMixFileSize): + print "Error: TRE file size mismatch with reported size in entry header!" + else: + treFileBuffer = inMIXFile.read(sizeOfMIXEntry) + if (len(treFileBuffer) == sizeOfMIXEntry): + # load TRE file + thisTreFile = treFile() + if (thisTreFile.loadTreFile(treFileBuffer, allMixFileSize)): + print "TRE file loaded" + if excelOutBook != None: + sh = excelOutBook.add_sheet(currTreFileName) + n = 0 # keeps track of rows + col1_name = 'Text Resource File: %s' % (currTreFileName) + sh.write(n, 0, col1_name) + # Second Row + n = 1 + col1_name = 'TextId' + col2_name = 'Text' + sh.write(n, 0, col1_name) + sh.write(n, 1, col2_name) + n+=1 + for m, e1 in enumerate(thisTreFile.stringEntriesLst, n): + sh.write(m, 0, e1[0]) + objStr = e1[1] + #print type (objUTF8SafeStr) # the type is STR here + # python strings are immutable (can't replace characters) but we have an issue with certain special characters in the ORIGINAL TRE (kiacred and endcred) + # (they are out of their order from their proper order in windwos-1252) + # so we need to create a new string. + objUTF8SafeStr = "" + for i in range(0, len(objStr)): + if (objStr[i] == '\x81'): + objUTF8SafeStr += 'ü' + elif (objStr[i] == '\x82'): + objUTF8SafeStr += 'é' + else: + objUTF8SafeStr += objStr[i] + #objUTF8Safe = objUTF8Safe.replace('\x81',u'u') #'ü' # this does not work + #objUTF8Safe = objUTF8Safe.replace('\x82',u'e') #'é' # this does not work + objUTF8Unicode = unicode(objUTF8SafeStr, 'utf-8') + sh.write(m, 1, objUTF8Unicode) + + + #for tupleIdString in thisTreFile.stringEntriesLst: + # #print "Id: %d\t Text: %s" % (tupleIdString[0], tupleIdString[1]) + # pass + totalTREs = totalTREs + 1 + else: + print "Error while LOADING TRE file!" + else: + print "Error while reading TRE file %s into mem buffer" % (''.join('{:08X}'.format(idOfMIXEntry))) + inMIXFile.close() + print "Total TREs: %d " % (totalTREs) + return + + +# +# Creating the OUTPUT XLS file with one sheet named as the @param sheet with entries based on the list1 (wav files, without duplicates) +# +def outputXLS(filename, sheet, list1, parseTREResourcesAlso = False, mixInputFolderPath = ''): + global stringReplacementForRootFolderWithExtractedFiles + global numReplaceStartingCharacters + book = xlwt.Workbook() + sh = book.add_sheet(sheet) +# First Row + n = 0 # keeps track of rows +# variables = [x, y, z] +# x_desc = 'Display' +# y_desc = 'Dominance' +# z_desc = 'Test' +# desc = [x_desc, y_desc, z_desc] +# +# +# #You may need to group the variables together +# #for n, (v_desc, v) in enumerate(zip(desc, variables)): +# for n, (v_desc, v) in enumerate(zip(desc, variables)): +# sh.write(n, 0, v_desc) +# sh.write(n, 1, v) + col1_name = 'BladeRunnerTLK In-Game dialogue / voiceover quotes' + sh.write(n, 0, col1_name) +# Second Row + n = 1 + col1_name = 'Filename' + col2_name = 'Quote' + col3_name = 'By Actor' + col4_name = 'Notes' + col5_name = 'To Actor' + col6_name = 'Resource' + col7_name = 'ShortHandFileName' + + sh.write(n, 0, col1_name) + sh.write(n, 1, col2_name) + sh.write(n, 2, col3_name) + sh.write(n, 3, col4_name) + sh.write(n, 4, col5_name) + sh.write(n, 5, col6_name) + sh.write(n, 6, col7_name) + + n+=1 + + for m, e1 in enumerate(list1, n): + twoTokensOfRelDirnameAndFilenameXLS = e1.split('&', 2) + if len(twoTokensOfRelDirnameAndFilenameXLS) == 3: + fourTokensOfFilename = twoTokensOfRelDirnameAndFilenameXLS[0].split('#', 3) + if len(fourTokensOfFilename) == 4: + # fix rogue _ chars in 3rd token of filename (split at '_') + tmpAUDFileName = fourTokensOfFilename[0] + '-' + fourTokensOfFilename[1] + '.AUD' + #ActorId-QuoteId.AUD + sh.write(m, 0, tmpAUDFileName) + twoTokensOfQuote = fourTokensOfFilename[2].split('-', 1) + if len(twoTokensOfQuote) == 2: + #Quote + sh.write(m, 1, twoTokensOfQuote[1]) + else: + #Quote + sh.write(m, 1, fourTokensOfFilename[2]) + #Notes + sh.write(m, 3, 'TODO') + #byActor + sh.write(m, 2, fourTokensOfFilename[3]) + #ResourceFolder + sh.write(m, 5, twoTokensOfRelDirnameAndFilenameXLS[1]) + #ShortHandFileName + tmpActorShortHand = getActorShortNameById(fourTokensOfFilename[0]) + shortHandFileName = tmpActorShortHand + '_' + fourTokensOfFilename[1] + '_' + fourTokensOfFilename[2] + '.WAV' + # real path of filename + realPathOfFileNameToLink = twoTokensOfRelDirnameAndFilenameXLS[2] + # checks if not empty + if stringReplacementForRootFolderWithExtractedFiles and numReplaceStartingCharacters > 0: + realPathOfFileNameToLink = realPathOfFileNameToLink.replace(realPathOfFileNameToLink[:numReplaceStartingCharacters], stringReplacementForRootFolderWithExtractedFiles) + + #works in Linux + Libreoffice + # also works in Windows + LibreOffice (run from msys) -- tried something like: + # python sortBladeRunnerWavs.py -p /g/WORKSPACE/BladeRunnerWorkspace/br-mixer-master/data/WAV -m "G:/WORKSPACE/BladeRunnerWorkspace/br-mixer-master/data/WAV" + #TODO put real full path for each file as FILE URL, and real (or approximate shorthand file name as alias) + hyperlinkAudioFormula = 'HYPERLINK("file://%s","%s")' % (realPathOfFileNameToLink, shortHandFileName) + sh.write(m, 6, Formula(hyperlinkAudioFormula)) + else: + sh.write(m, 0, e1) + #Notes + sh.write(m, 3, 'error') + else: + sh.write(m, 0, e1) + #Notes + sh.write(m, 3, 'error') + + + # works for filenames where some rogue greek characters exist + #sh.write(m, 0, str.decode("%s" % e1, 'utf-8')) + +# for m, e2 in enumerate(list2, n+1): +# sh.write(m, 1, e2) + + if parseTREResourcesAlso == True and mixInputFolderPath != '': + inputMIXExtractTREs(mixInputFolderPath, book) + # TODO add sheets + # TODO handle special string characters (to UTF-8) + + book.save(filename) + +# +# +# +# ######################## +# main +# 00_0000 -- DealsInInsects dupl TLK01, TLK0A +# 00_0510 -- ThinkingOfChangingJobs-Leon dupl TLK02, TLK03 +# 00-8520 -- WhatDoYouKnow dupl TLK01, TLK0A + +# Total unique quotes seems to be 5495! +# TODO rename files in folders to conform to the underscore '_' and '-' format (a few don't -- let's have them all conforming!) +# ######################### +# +if __name__ == "__main__": + TMProotFolderWithExtractedFiles = "" + TMProotFolderWithInputTLKFiles = "" + + extractWavFilesMode = False + extractTreFilesMode = False + + invalidSyntax = False +# print "Len of sysargv = %s" % (len(sys.argv)) + if len(sys.argv) == 2: + if(sys.argv[1] == '--help'or sys.argv[1] == '-h'): + print "%s %s supports Blade Runner (English version, CD edition)." % (app_name_spaced, app_version) + print "Created by Praetorian of the classic adventures in Greek team." + print "Always keep backups!" + print "--------------------" + print "Preparatory steps:" + print "1. Put actornames.txt file in the same folder with this tool." + print "--------------------" + print "%s takes has one mandatory argument, the folder of the extracted WAV files:" % (app_name_spaced) + print "Valid syntax: %s -ip [folderpath_for_TLK_Files] -op [folderpath_for_extracted_wav_Files] -m [stringPathToReplaceFolderpathInExcelLinks]" % (app_name) + print "The -op switch has an argument that is the path for extracted WAV files folder. The -op switch is REQUIRED always." + print "The -ip switch has an argument that is the path for the input (TLK or MIX) files folder (can be the same as the Blade Runner installation folder)." + print "The -m switch has an argument that is a replacement string for the path to the folder of extracted WAV files which will be used as a prefix for the links in the output XLS file." + print "The -xwav switch enables the WAV audio extract mode from the TLK files. It requires an INPUT path to be set with the -ip switch." + print "The -xtre switch enables the TRE parsing mode from the original MIX files. It requires an INPUT path to be set with the -ip switch." + print "If the app finishes successfully a sortedWavs.xls file will be created in the same folder with the app." + print "--------------------" + print "Thank you for using this app." + print "Please provide any feedback to: %s " % (company_email) + sys.exit() + elif(sys.argv[1] == '--version' or sys.argv[1] == '-v'): + print "%s %s supports Blade Runner (English version, CD edition)." % (app_name_spaced, app_version) + print "Please provide any feedback to: %s " % (company_email) + sys.exit() + else: + invalidSyntax = True + elif len(sys.argv) > 2: + for i in range(1, len(sys.argv)): + if( i < (len(sys.argv) - 1) and sys.argv[i][:1] == '-' and sys.argv[i+1][:1] != '-'): + if (sys.argv[i] == '-op'): + TMProotFolderWithExtractedFiles = sys.argv[i+1] + numReplaceStartingCharacters = len(TMProotFolderWithExtractedFiles) + elif (sys.argv[i] == '-ip'): + TMProotFolderWithInputTLKFiles = sys.argv[i+1] + elif (sys.argv[i] == '-m'): + stringReplacementForRootFolderWithExtractedFiles = sys.argv[i+1] + elif (sys.argv[i] == '-xwav'): + print "Extract WAVs from TLK files mode enabled." + extractWavFilesMode = True + elif (sys.argv[i] == '-xtre'): + print "Extract TRE mode enabled." + extractTreFilesMode = True + if not TMProotFolderWithExtractedFiles: # this argument is mandatory + invalidSyntax = True + + if (extractWavFilesMode == True or extractTreFilesMode == True) and (TMProotFolderWithInputTLKFiles == ''): + invalidSyntax = True + + if not invalidSyntax: + + # parse Actors files: + initActorPropertyEntries() +# for actorEntryTmp in actorPropertyEntries: +# print "Found actor: %s %s %s" % (actorEntryTmp[0], actorEntryTmp[1], actorEntryTmp[2]) + # + # Checking for the optional case of parsing the input TLK files to extract to WAV + # + if TMProotFolderWithInputTLKFiles != '': + if (extractWavFilesMode == True): + inputTLKsExtract(TMProotFolderWithInputTLKFiles, TMProotFolderWithExtractedFiles) + #if (extractTreFilesMode == True): + # inputMIXExtractTREs(TMProotFolderWithInputTLKFiles) + # + # Parsing the extracted WAV files + # + print "Parsing the extracted WAV audio files. Please wait (it could take a while)..." + for (dirpath, dirnames, filenames) in walk(TMProotFolderWithExtractedFiles): + for nameIdx, nameTmp in enumerate(filenames): + relDirName = '' +# os.path.split would Split the pathname path into a pair, (head, tail) where tail is the last pathname component and head is everything leading up to that. The tail part will never contain a slash + pathTokens = dirpath.split(os.path.sep) + for pTokenTmp in pathTokens: + if pTokenTmp.find("TLK") != -1: + relDirName = pTokenTmp +# print os.path.dirname(dirpath) +# print os.path.abspath(os.path.join(os.path.join(dirpath, nameTmp), os.pardir)) + filenames[nameIdx] = filenames[nameIdx] +'&' + relDirName + '&' + os.path.join(dirpath, nameTmp) + wavfiles.extend(filenames) +# break + for fileIdx, filenameTmp in enumerate(wavfiles): + twoTokensOfFilenameAndRelDirname = filenameTmp.split('&', 1) + if len(twoTokensOfFilenameAndRelDirname) != 2: + print "ERROR in filename and rel dirname split: %s" % (filenameTmp) + sys.exit(0) + twoTokensOfFilenameForExt = twoTokensOfFilenameAndRelDirname[0].split('.', 1) + if len(twoTokensOfFilenameForExt) == 2: + if twoTokensOfFilenameForExt[1] != 'WAV' and twoTokensOfFilenameForExt[1] != 'wav': + print "ERROR in proper extension (not WAV): %s" % (twoTokensOfFilenameAndRelDirname[0]) + sys.exit(0) + else: + print "ERROR in extension split: %s" % (twoTokensOfFilenameAndRelDirname[0]) + sys.exit(0) + #remove WAV extension here +# filenameTmp = twoTokensOfFilenameAndRelDirname[0] + '&' + twoTokensOfFilenameForExt[0] +# print "Found %s" % (filenameTmp) + + threeTokensOfFilename = twoTokensOfFilenameForExt[0].split('_', 2) + if len(threeTokensOfFilename) == 3: + # fix rogue _ chars in 3rd token of filename (split at '_') + threeTokensOfFilename[2] = threeTokensOfFilename[2].replace("_", "-") + # Replace first token + # replace actor name shorthand with ActorID in first part + tmpActorId = getActorIdByShortName(threeTokensOfFilename[0]) + tmpActorFullName = '' + if(tmpActorId != '' and tmpActorId is not None): + tmpActorFullName = getActorFullNameById(tmpActorId) + if(tmpActorFullName != '' and tmpActorFullName is not None): + threeTokensOfFilename[0] = tmpActorId.zfill(2) + threeTokensOfFilename.append(tmpActorFullName) + else: + #fatal error if something cannot convert to spot it immediately + print "ERROR in actorIdMatch match: %s %s" % (tmpActorId, twoTokensOfFilenameForExt[0]) + sys.exit(0) + else: + #fatal error if something cannot convert to spot it immediately + print "ERROR in shorthand match: %s %s" % (threeTokensOfFilename[0], twoTokensOfFilenameForExt[0]) + sys.exit(0) +# +# +# foundMatchForActorShortHand = False +# for actorEntryTmp in actorPropertyEntries: +# if actorEntryTmp[1] == threeTokensOfFilename[0]: +# threeTokensOfFilename[0] = actorEntryTmp[0].zfill(2) +# threeTokensOfFilename.append(actorEntryTmp[2]) +# foundMatchForActorShortHand = True +# break + # end of replace actor name shorthand + twoTokensOfFilenameForExt[0] = '#'.join(threeTokensOfFilename) + filenameTmp = twoTokensOfFilenameForExt[0] + '&' + twoTokensOfFilenameAndRelDirname[1] + wavfiles[fileIdx] = filenameTmp + else: + print "ERROR in spliting tokens on _: %s" % (filenameTmp) + sys.exit(0) + #sort in-place + # + # + wavfiles.sort() +# # +# # +# # Code for renaming non conforming filenames - just to be consistent in file naming +# # TO BE RUN ONCE FOR CONFORMANCE. No NEED TO Re-RUN +# # If its run though, comment this section and re-run the tool to get proper links in Excel file +# # +# for filenameSrcTmp in wavfiles: +# # get real full path from last token when split at & +# # create target full path from the parentdir of last token and the current state of first 3 tokens when splitting at '#' +# # report mismatch +# # print (BUT DON'T PROCEED AT THIS POINT) what you would rename to what. +# threeTokensOfFilenameAndRelDirname = filenameSrcTmp.split('&', 2) +# currentSrcFullPath = threeTokensOfFilenameAndRelDirname[2] +# fourTokensOfTargetFileName = threeTokensOfFilenameAndRelDirname[0].split('#', 3) +# tmpActorShortHand = getActorShortNameById(fourTokensOfTargetFileName[0]) +# targetFileName = tmpActorShortHand + '_' + fourTokensOfTargetFileName[1] + '_' + fourTokensOfTargetFileName[2] + '.WAV' +# # os.path.split would Split the pathname path into a pair, (head, tail) where tail is the last pathname component and head is everything leading up to that. The tail part will never contain a slash +# (srcParentDir, srcTail) = os.path.split(currentSrcFullPath) +# targetFullPath = os.path.join(srcParentDir, targetFileName) +# # os.rename(src, dst) +# if(currentSrcFullPath != targetFullPath): +# print currentSrcFullPath +# print targetFullPath +# os.rename(currentSrcFullPath, targetFullPath) + # + # END OF: Code for renaming non conforming filenames - just to be consistent in file naming + # + # + for filenameSrcTmp in wavfiles: + duplicateFound = False +# print "Converted %s" % (filenameSrcTmp) + # Weed out duplicates by copying to another table (quick and dirty) + twoTokensOfRelDirnameAndFilenameSrc = filenameSrcTmp.split('&', 2) + tmpRelDirNameSrc = twoTokensOfRelDirnameAndFilenameSrc[1] + threeTokensOfQuoteFilenameSrc = twoTokensOfRelDirnameAndFilenameSrc[0].split('#', 2) + #concatenate actorID and quoteID for search key + keyForDuplicateSearchSrc = threeTokensOfQuoteFilenameSrc[0] + threeTokensOfQuoteFilenameSrc[1] + for fileTargIdx, filenameTargTmp in enumerate(wavfilesNoDups): + twoTokensOfRelDirnameAndFilenameTarg = filenameTargTmp.split('&', 2) + tmpRelDirNameTarg = twoTokensOfRelDirnameAndFilenameTarg[1] + threeTokensOfQuoteFilenameTarg = twoTokensOfRelDirnameAndFilenameTarg[0].split('#', 2) + #concatenate actorID and quoteID for search key + keyForDuplicateSearchTarg = threeTokensOfQuoteFilenameTarg[0] + threeTokensOfQuoteFilenameTarg[1] + if(keyForDuplicateSearchSrc == keyForDuplicateSearchTarg): + #print "Found duplicate %s" % (filenameSrcTmp) + duplicateFound = True + wavfilesNoDups[fileTargIdx] = twoTokensOfRelDirnameAndFilenameTarg[0] + '&' + tmpRelDirNameSrc + ',' + tmpRelDirNameTarg + '&' + twoTokensOfRelDirnameAndFilenameTarg[2] + break + if(duplicateFound == False): + wavfilesNoDups.append(filenameSrcTmp) +# for filenameSrcTmp in wavfilesNoDups: +# print "Unique %s" % (filenameSrcTmp) + + print "Creating output excel %s file..." % (OUTPUT_XLS_FILENAME) + outputXLS(OUTPUT_XLS_FILENAME, OUTPUT_XLS_QUOTES_SHEET, wavfilesNoDups, extractTreFilesMode, TMProotFolderWithInputTLKFiles) + else: + invalidSyntax = True + + if invalidSyntax == True: + print "Invalid syntax\n Try: \n %s -op [folderpath_for_extracted_wav_Files] \n %s --help for more info \n %s --version for version info " % (app_name, app_name, app_name) + tmpi = 0 + for tmpArg in sys.argv: + if tmpi==0: #skip first argument + tmpi+=1 + continue + print "\nArgument: %s" % (tmpArg) + tmpi+=1 +else: + ## debug + #print '%s was imported from another module' % (app_name_spaced,) + pass diff --git a/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/treFileLib.py b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/treFileLib.py new file mode 100644 index 0000000000..b9d147248d --- /dev/null +++ b/devtools/blade_runner/subtitles/quotesSpreadsheetCreator/treFileLib.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +# +import os, sys, shutil +import struct +from struct import * + +my_module_version = "0.50" +my_module_name = "treFileLib" + + +class TreHeader: + numOfTextResources = -1 + def __init__(self): + return + + +class treFile: + m_header = TreHeader() + stringEntriesLst = [] # list of two-value tuples. First value is ID, second value is String content + stringOffsets = [] + def __init__(self): + del self.stringEntriesLst[:] + del self.stringOffsets[:] + return + + def loadTreFile(self, treBytesBuff, maxLength): + offsInTreFile = 0 + # + # parse TRE file fields for header + # + try: + tmpTuple = struct.unpack_from('I', treBytesBuff, offsInTreFile) # unsigned integer 4 bytes + self.header().numOfTextResources = tmpTuple[0] + offsInTreFile += 4 + # + # string IDs table (each entry is unsigned integer 4 bytes) + # + print "Total texts in TRE: %d" % (self.header().numOfTextResources) + for idx in range(0, self.header().numOfTextResources): + tmpTuple = struct.unpack_from('I', treBytesBuff, offsInTreFile) # unsigned integer 4 bytes + self.stringEntriesLst.append( (tmpTuple[0], '') ) + offsInTreFile += 4 + + # string offsets table (each entry is unsigned integer 4 bytes) + for idx in range(0, self.header().numOfTextResources): + tmpTuple = struct.unpack_from('I', treBytesBuff, offsInTreFile) # unsigned integer 4 bytes + self.stringOffsets.append( tmpTuple[0] ) + offsInTreFile += 4 + # + # strings (all entries are null terminated) + # TODO +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + absStartOfIndexTable = 4 + #absStartOfOffsetTable = absStartOfIndexTable + (self.header().numOfTextResources * 4) + #absStartOfStringTable = absStartOfOffsetTable + ((self.header().numOfTextResources+1) * 4) + + #print "buffer type " , type(treBytesBuff) # it is str + + for idx in range(0, self.header().numOfTextResources): + currOffset = self.stringOffsets[idx] + absStartOfIndexTable + # the buffer (treBytesBuff) where we read the TRE file into, is "str" type but contains multiple null terminated strings + # the solution here (to not get out of index errors when reading the null terminator points) is + # to split the substring starting at the indicated offset each time, at the null character, and get the first string token. + # This works ok. + # + allTextsFound = treBytesBuff[currOffset:].split('\x00') + # check "problematic" character cases: + if currOffset == 5982 or currOffset == 6050 or currOffset == 2827 or currOffset == 2880: + print "Offs: %d\tFound String: %s" % ( currOffset,''.join(allTextsFound[0]) ) + #print "Offs: %d\tFound String: %s" % ( currOffset,''.join(allTextsFound[0]) ) + (theId, stringOfIdx) = self.stringEntriesLst[idx] + self.stringEntriesLst[idx] = (theId, ''.join(allTextsFound[0])) + #print "ID: %d\tFound String: %s" % ( theId,''.join(allTextsFound[0]) ) + return True + except: + print "Loading failure!" + return False + + def header(self): + return self.m_header +# +# +# +if __name__ == '__main__': + # main() + print "Running %s as main module" % (my_module_name) + # assumes a file of name ACTORS.TRE in same directory + inTREFile = None + errorFound = False + try: + inTREFile = open(os.path.join('.','ACTORS.TRE'), 'rb') + except: + errorFound = True + print "Unexpected error:", sys.exc_info()[0] + raise + if not errorFound: + allOfTreFileInBuffer = inTREFile.read() + treFileInstance = treFile() + if (treFileInstance.loadTreFile(allOfTreFileInBuffer, len(allOfTreFileInBuffer))): + print "TRE file loaded successfully!" + else: + print "Error while loading TRE file!" + inTREFile.close() +else: + #debug + #print "Running %s imported from another module" % (my_module_name) + pass
\ No newline at end of file |