From b1bffaba8682af2870d4ff019bedbceb72440707 Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Mon, 12 Aug 2019 23:33:54 -0700 Subject: TTS: Implement our own queuing on windows. Similarly as on linux, there isn't enough control of the speech queue to properly implement INTERRUPT_NO_REPEAT. So since this commit we use our own queuing and use SAPI to speak each speech. This is done outside the main thread. --- .../windows/windows-text-to-speech.cpp | 172 ++++++++++++++++----- .../windows/windows-text-to-speech.h | 13 ++ 2 files changed, 149 insertions(+), 36 deletions(-) diff --git a/backends/text-to-speech/windows/windows-text-to-speech.cpp b/backends/text-to-speech/windows/windows-text-to-speech.cpp index 7c8f879189..ec3961dd6b 100644 --- a/backends/text-to-speech/windows/windows-text-to-speech.cpp +++ b/backends/text-to-speech/windows/windows-text-to-speech.cpp @@ -50,6 +50,15 @@ ISpAudio *_audio; WindowsTextToSpeechManager::WindowsTextToSpeechManager() : _speechState(BROKEN){ init(); + _threadParams.queue = &_speechQueue; + _threadParams.state = &_speechState; + _threadParams.mutex = &_speechMutex; + _thread = NULL; + _speechMutex = CreateMutex(NULL, FALSE, NULL); + if (_speechMutex == NULL) { + _speechState = BROKEN; + warning("Could not create TTS mutex"); + } } void WindowsTextToSpeechManager::init() { @@ -84,19 +93,67 @@ void WindowsTextToSpeechManager::init() { _voice->SetOutput(_audio, FALSE); - if(_ttsState->_availableVoices.size() > 0) + if (_ttsState->_availableVoices.size() > 0) _speechState = READY; else _speechState = NO_VOICE; _lastSaid = ""; + while (!_speechQueue.empty()) { + free(_speechQueue.front()); + _speechQueue.pop_front(); + } } WindowsTextToSpeechManager::~WindowsTextToSpeechManager() { + stop(); + if (_thread != NULL) { + WaitForSingleObject(_thread, INFINITE); + CloseHandle(_thread); + } + if (_speechMutex != NULL) { + CloseHandle(_speechMutex); + } if (_voice) _voice->Release(); ::CoUninitialize(); } +DWORD WINAPI startSpeech(LPVOID parameters) { + WindowsTextToSpeechManager::SpeechParameters *params = + (WindowsTextToSpeechManager::SpeechParameters *) parameters; + // wait for the previous speech, if the previous thread exited too early + _voice->WaitUntilDone(INFINITE); + + while (!params->queue->empty()) { + WaitForSingleObject(*params->mutex, INFINITE); + // check again, when we have exclusive access to the queue + if (params->queue->empty() || *(params->state) == WindowsTextToSpeechManager::PAUSED) { + break; + } + WCHAR *currentSpeech = params->queue->front(); + _voice->Speak(currentSpeech, SPF_PURGEBEFORESPEAK | SPF_ASYNC, 0); + ReleaseMutex(*params->mutex); + + while (*(params->state) != WindowsTextToSpeechManager::PAUSED) + if (_voice->WaitUntilDone(10) == S_OK) + break; + + WaitForSingleObject(*params->mutex, INFINITE); + if (!params->queue->empty() && params->queue->front() == currentSpeech) { + if (currentSpeech != NULL) + free(currentSpeech); + params->queue->pop_front(); + } + ReleaseMutex(*params->mutex); + } + + WaitForSingleObject(*params->mutex, INFINITE); + if (*(params->state) != WindowsTextToSpeechManager::PAUSED) + *(params->state) = WindowsTextToSpeechManager::READY; + ReleaseMutex(*params->mutex); + return 0; +} + bool WindowsTextToSpeechManager::say(Common::String str, Action action, Common::String charset) { if (_speechState == BROKEN || _speechState == NO_VOICE) { warning("The tts cannot speak in this state"); @@ -106,12 +163,6 @@ bool WindowsTextToSpeechManager::say(Common::String str, Action action, Common:: if (isSpeaking() && action == DROP) return true; - if (isSpeaking() && action == INTERRUPT_NO_REPEAT && _lastSaid == str) - return true; - - if (isSpeaking() && action == QUEUE_NO_REPEAT && _lastSaid == str) - return true; - if (charset.empty()) { #ifdef USE_TRANSLATION charset = TransMan.getCurrentCharset(); @@ -119,65 +170,114 @@ bool WindowsTextToSpeechManager::say(Common::String str, Action action, Common:: charset = "ASCII"; #endif } - _lastSaid = str; + // We have to set the pitch by prepending xml code at the start of the said string; Common::String pitch= Common::String::format("", _ttsState->_pitch / 10); str.replace((uint32)0, 0, pitch); WCHAR *strW = Win32::ansiToUnicode(str.c_str(), Win32::getCodePageId(charset)); - if ((isPaused() || isSpeaking()) && (action == INTERRUPT || action == INTERRUPT_NO_REPEAT)) + WaitForSingleObject(_speechMutex, INFINITE); + if (isSpeaking() && !_speechQueue.empty() && action == INTERRUPT_NO_REPEAT && + _speechQueue.front() != NULL && !wcscmp(_speechQueue.front(), strW)) { + while (_speechQueue.size() != 1) { + free(_speechQueue.back()); + _speechQueue.pop_back(); + } + free(strW); + ReleaseMutex(_speechMutex); + return true; + } + + if (isSpeaking() && !_speechQueue.empty() && action == QUEUE_NO_REPEAT && + _speechQueue.front() != NULL &&!wcscmp(_speechQueue.back(), strW)) { + ReleaseMutex(_speechMutex); + return true; + } + + ReleaseMutex(_speechMutex); + if ((isPaused() || isSpeaking()) && (action == INTERRUPT || action == INTERRUPT_NO_REPEAT)) { stop(); + } - bool result = _voice->Speak(strW, SPF_ASYNC, NULL) != S_OK; - free(strW); - if (!isPaused()) + WaitForSingleObject(_speechMutex, INFINITE); + _speechQueue.push_back(strW); + ReleaseMutex(_speechMutex); + + if (!isSpeaking() && !isPaused()) { + DWORD threadId; + if (_thread != NULL) { + WaitForSingleObject(_thread, INFINITE); + CloseHandle(_thread); + } _speechState = SPEAKING; - return result; + _thread = CreateThread(NULL, 0, startSpeech, &_threadParams, 0, &threadId); + if (_thread == NULL) { + warning("Could not create speech thread"); + _speechState = READY; + return true; + } + } + return false; } bool WindowsTextToSpeechManager::stop() { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return true; if (isPaused()) resume(); _audio->SetState(SPAS_STOP, 0); + WaitForSingleObject(_speechMutex, INFINITE); + while (!_speechQueue.empty()) { + if (_speechQueue.front() != NULL) + free(_speechQueue.front()); + _speechQueue.pop_front(); + } + _speechQueue.push_back(NULL); + ReleaseMutex(_speechMutex); + if (_thread != NULL) { + WaitForSingleObject(_thread, INFINITE); + CloseHandle(_thread); + _thread = NULL; + } _audio->SetState(SPAS_RUN, 0); - _voice->Speak(NULL, SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0); - _speechState = READY; return false; } bool WindowsTextToSpeechManager::pause() { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return true; if (isPaused()) return false; + WaitForSingleObject(_speechMutex, INFINITE); _voice->Pause(); _speechState = PAUSED; + ReleaseMutex(_speechMutex); return false; } bool WindowsTextToSpeechManager::resume() { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return true; if (!isPaused()) return false; _voice->Resume(); - if (isSpeaking()) - _speechState = SPEAKING; - else + DWORD threadId; + if (_thread != NULL) { + WaitForSingleObject(_thread, INFINITE); + CloseHandle(_thread); + } + _speechState = SPEAKING; + _thread = CreateThread(NULL, 0, startSpeech, &_threadParams, 0, &threadId); + if (_thread == NULL) { + warning("Could not create speech thread"); _speechState = READY; + return true; + } return false; } bool WindowsTextToSpeechManager::isSpeaking() { - if(_speechState == BROKEN || _speechState == NO_VOICE) - return false; - SPAUDIOSTATUS audioStatus; - SPVOICESTATUS voiceStatus; - _audio->GetStatus(&audioStatus); - _voice->GetStatus(&voiceStatus, NULL); - return audioStatus.State != SPAS_CLOSED || voiceStatus.dwRunningState != SPRS_DONE; + return _speechState == SPEAKING; } bool WindowsTextToSpeechManager::isPaused() { @@ -185,7 +285,7 @@ bool WindowsTextToSpeechManager::isPaused() { } bool WindowsTextToSpeechManager::isReady() { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return false; if (_speechState != PAUSED && !isSpeaking()) return true; @@ -194,14 +294,14 @@ bool WindowsTextToSpeechManager::isReady() { } void WindowsTextToSpeechManager::setVoice(unsigned index) { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return; _voice->SetVoice((ISpObjectToken *) _ttsState->_availableVoices[index].getData()); _ttsState->_activeVoice = index; } void WindowsTextToSpeechManager::setRate(int rate) { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return; assert(rate >= -100 && rate <= 100); _voice->SetRate(rate / 10); @@ -209,14 +309,14 @@ void WindowsTextToSpeechManager::setRate(int rate) { } void WindowsTextToSpeechManager::setPitch(int pitch) { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return; assert(pitch >= -100 && pitch <= 100); _ttsState->_pitch = pitch; } void WindowsTextToSpeechManager::setVolume(unsigned volume) { - if(_speechState == BROKEN || _speechState == NO_VOICE) + if (_speechState == BROKEN || _speechState == NO_VOICE) return; assert(volume <= 100); _voice->SetVolume(volume); @@ -306,7 +406,7 @@ void WindowsTextToSpeechManager::createVoice(void *cpVoiceToken) { int strToInt(Common::String str) { str.toUppercase(); int result = 0; - for(unsigned i = 0; i < str.size(); i++) { + for (unsigned i = 0; i < str.size(); i++) { if (str[i] < '0' || (str[i] > '9' && str[i] < 'A') || str[i] > 'F') break; int num = (str[i] <= '9') ? str[i] - '0' : str[i] - 55; @@ -342,7 +442,7 @@ void WindowsTextToSpeechManager::updateVoices() { while (SUCCEEDED(hr) && ulCount--) { hr = cpEnum->Next(1, &cpVoiceToken, NULL); _voice->SetVoice(cpVoiceToken); - if(SUCCEEDED(_voice->Speak(L"hi, this is test", SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0))) + if (SUCCEEDED(_voice->Speak(L"hi, this is test", SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0))) createVoice(cpVoiceToken); else cpVoiceToken->Release(); @@ -355,7 +455,7 @@ void WindowsTextToSpeechManager::updateVoices() { _voice->SetVolume(_ttsState->_volume); cpEnum->Release(); - if(_ttsState->_availableVoices.size() == 0) { + if (_ttsState->_availableVoices.size() == 0) { _speechState = NO_VOICE; warning("No voice is available"); } else if (_speechState == NO_VOICE) diff --git a/backends/text-to-speech/windows/windows-text-to-speech.h b/backends/text-to-speech/windows/windows-text-to-speech.h index d8968243ec..cbf3eb07df 100644 --- a/backends/text-to-speech/windows/windows-text-to-speech.h +++ b/backends/text-to-speech/windows/windows-text-to-speech.h @@ -29,6 +29,8 @@ #include "common/text-to-speech.h" #include "common/str.h" +#include "common/list.h" + class WindowsTextToSpeechManager : public Common::TextToSpeechManager { public: @@ -40,6 +42,12 @@ public: NO_VOICE }; + struct SpeechParameters { + Common::List *queue; + SpeechState *state; + HANDLE *mutex; + }; + WindowsTextToSpeechManager(); virtual ~WindowsTextToSpeechManager(); @@ -72,8 +80,13 @@ private: Common::String lcidToLocale(Common::String lcid); SpeechState _speechState; Common::String _lastSaid; + HANDLE _thread; + Common::List _speechQueue; + SpeechParameters _threadParams; + HANDLE _speechMutex; }; + #endif #endif // BACKENDS_UPDATES_WINDOWS_H -- cgit v1.2.3