diff options
-rw-r--r-- | backends/text-to-speech/windows/windows-text-to-speech.cpp | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/backends/text-to-speech/windows/windows-text-to-speech.cpp b/backends/text-to-speech/windows/windows-text-to-speech.cpp index 575c72b41f..58389e5141 100644 --- a/backends/text-to-speech/windows/windows-text-to-speech.cpp +++ b/backends/text-to-speech/windows/windows-text-to-speech.cpp @@ -58,25 +58,25 @@ void WindowsTextToSpeechManager::init() { if (FAILED(::CoInitialize(NULL))) return; - // init voice - HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&_voice); - if (!SUCCEEDED(hr)) { - warning("Could not initialize TTS voice"); - return; - } - setLanguage("en"); - // init audio CSpStreamFormat format; format.AssignFormat(SPSF_11kHz8BitMono); ISpObjectToken *pToken; - hr = SpGetDefaultTokenFromCategoryId(SPCAT_AUDIOOUT, &pToken); + HRESULT hr = SpGetDefaultTokenFromCategoryId(SPCAT_AUDIOOUT, &pToken); if (FAILED(hr)) { warning("Could not initialize TTS audio"); return; } pToken->CreateInstance(NULL, CLSCTX_ALL, IID_ISpAudio, (void **)&_audio); _audio->SetFormat(format.FormatId(), format.WaveFormatExPtr()); + + // init voice + hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&_voice); + if (!SUCCEEDED(hr)) { + warning("Could not initialize TTS voice"); + return; + } + setLanguage("en"); _voice->SetOutput(_audio, FALSE); if(_ttsState->_availableVoices.size() > 0) @@ -211,11 +211,9 @@ int WindowsTextToSpeechManager::getVolume() { } void WindowsTextToSpeechManager::freeVoices() { - for(Common::TTSVoice *i = _ttsState->_availableVoices.begin(); i < _ttsState->_availableVoices.end(); i++) { - ISpObjectToken *voiceData = (ISpObjectToken *)i->getData(); - voiceData->Release(); - } _ttsState->_availableVoices.clear(); + // The voice data gets freed automaticly, when the reference counting inside TTSVoice + // reaches 0, so there is no point in trying to free it here } void WindowsTextToSpeechManager::setLanguage(Common::String language) { @@ -234,6 +232,11 @@ void WindowsTextToSpeechManager::createVoice(void *cpVoiceToken) { SpGetDescription(voiceToken, &descW); char *buffer = Win32::unicodeToAnsi(descW); Common::String desc = buffer; + if (desc == "Sample TTS Voice") { + // This is really bad voice, it is basicaly unusable + free(buffer); + return; + } free(buffer); // voice attributes @@ -290,7 +293,7 @@ void WindowsTextToSpeechManager::createVoice(void *cpVoiceToken) { free(buffer); CoTaskMemFree(data); - _ttsState->_availableVoices.push_back(Common::TTSVoice(gender, Common::TTSVoice::ADULT, (void *) voiceToken, desc)); + _ttsState->_availableVoices.push_back(Common::TTSVoice(gender, age, (void *) voiceToken, desc)); } int strToInt(Common::String str) { @@ -337,6 +340,11 @@ void WindowsTextToSpeechManager::updateVoices() { else cpVoiceToken->Release(); } + // stop the test speech, we don't use stop(), because we don't wan't it to set state to READY + // and we could easily be in NO_VOICE or BROKEN state here, in which the stop() wouldn't work + _audio->SetState(SPAS_STOP, 0); + _audio->SetState(SPAS_RUN, 0); + _voice->Speak(NULL, SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0); _voice->SetVolume(_ttsState->_volume); cpEnum->Release(); |