aboutsummaryrefslogtreecommitdiff
path: root/backends/text-to-speech
diff options
context:
space:
mode:
authorJaromir Wysoglad2019-07-16 22:09:05 -0700
committerFilippos Karapetis2019-09-01 22:47:55 +0300
commit318c6d7ec6e5562d0fd3e9d70386d0fcde86cf12 (patch)
treed38d0048557b0fbc0a5ea09fcaf36abf3890cfdd /backends/text-to-speech
parentd2d34a4ecaabd7d436bfab942c9003d0e478ad2a (diff)
downloadscummvm-rg350-318c6d7ec6e5562d0fd3e9d70386d0fcde86cf12.tar.gz
scummvm-rg350-318c6d7ec6e5562d0fd3e9d70386d0fcde86cf12.tar.bz2
scummvm-rg350-318c6d7ec6e5562d0fd3e9d70386d0fcde86cf12.zip
TTS: Finish implementing the Windows TTS manager
Diffstat (limited to 'backends/text-to-speech')
-rw-r--r--backends/text-to-speech/linux/linux-text-to-speech.cpp21
-rw-r--r--backends/text-to-speech/linux/linux-text-to-speech.h2
-rw-r--r--backends/text-to-speech/windows/windows-text-to-speech.cpp248
-rw-r--r--backends/text-to-speech/windows/windows-text-to-speech.h9
4 files changed, 267 insertions, 13 deletions
diff --git a/backends/text-to-speech/linux/linux-text-to-speech.cpp b/backends/text-to-speech/linux/linux-text-to-speech.cpp
index 9f330bc997..1e92742845 100644
--- a/backends/text-to-speech/linux/linux-text-to-speech.cpp
+++ b/backends/text-to-speech/linux/linux-text-to-speech.cpp
@@ -230,4 +230,25 @@ void LinuxTextToSpeechManager::updateVoices() {
}
+bool LinuxTextToSpeechManager::popState() {
+ if (_ttsState->_next == nullptr)
+ return true;
+
+ for (Common::TTSVoice *i = _ttsState->_availaibleVoices.begin(); i < _ttsState->_availaibleVoices.end(); i++) {
+ free(i->getData());
+ }
+
+ Common::TTSState *oldState = _ttsState;
+ _ttsState = _ttsState->_next;
+
+ delete oldState;
+
+ setLanguage(_ttsState->_language);
+ setPitch(_ttsState->_pitch);
+ setVolume(_ttsState->_volume);
+ setRate(_ttsState->_rate);
+ return false;
+}
+
+
#endif
diff --git a/backends/text-to-speech/linux/linux-text-to-speech.h b/backends/text-to-speech/linux/linux-text-to-speech.h
index d08da49425..cd3fcf6cb3 100644
--- a/backends/text-to-speech/linux/linux-text-to-speech.h
+++ b/backends/text-to-speech/linux/linux-text-to-speech.h
@@ -63,6 +63,8 @@ public:
virtual void setLanguage(Common::String language);
+ virtual bool popState();
+
void updateState(SpeechState state);
private:
diff --git a/backends/text-to-speech/windows/windows-text-to-speech.cpp b/backends/text-to-speech/windows/windows-text-to-speech.cpp
index aad50c61ff..edadf5e667 100644
--- a/backends/text-to-speech/windows/windows-text-to-speech.cpp
+++ b/backends/text-to-speech/windows/windows-text-to-speech.cpp
@@ -31,6 +31,7 @@
#include <Servprov.h>
#include <sapi.h>
#include "backends/text-to-speech/windows/sphelper-scummvm.h"
+#include "backends/platform/sdl/win32/win32_wrapper.h"
#include "backends/text-to-speech/windows/windows-text-to-speech.h"
@@ -43,82 +44,307 @@
ISpVoice *_voice;
+// We need this pointer to be able to stop speech immediately.
+ISpAudio *_audio;
+
WindowsTextToSpeechManager::WindowsTextToSpeechManager()
: _speechState(BROKEN){
init();
}
void WindowsTextToSpeechManager::init() {
+ // init COM
if (FAILED(::CoInitialize(NULL)))
return;
+ // init voice
HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&_voice);
if (!SUCCEEDED(hr)) {
warning("Could not initialize TTS voice");
return;
}
- updateVoices();
- _speechState = READY;
+ setLanguage("en");
+
+ // init audio
+ CSpStreamFormat format;
+ format.AssignFormat(SPSF_11kHz8BitMono);
+ ISpObjectToken *pToken;
+ hr = SpGetDefaultTokenFromCategoryId(SPCAT_AUDIOOUT, &pToken);
+ if (FAILED(hr)) {
+ warning("Could not initialize TTS audio");
+ return;
+ }
+ pToken->CreateInstance(NULL, CLSCTX_ALL, IID_ISpAudio, (void **)&_audio);
+ _audio->SetFormat(format.FormatId(), format.WaveFormatExPtr());
+ _voice->SetOutput(_audio, FALSE);
+
+ if(_ttsState->_availaibleVoices.size() > 0)
+ _speechState = READY;
+ else
+ _speechState = NO_VOICE;
}
WindowsTextToSpeechManager::~WindowsTextToSpeechManager() {
+ freeVoices();
if (_voice)
_voice->Release();
::CoUninitialize();
}
bool WindowsTextToSpeechManager::say(Common::String str) {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE) {
+ warning("The tts cannot speak in this state");
+ return true;
+ }
+ if (isPaused()) {
+ resume();
+ }
+ _audio->SetState(SPAS_STOP, 0);
+ _audio->SetState(SPAS_RUN, 0);
+ // We have to set the pitch by prepending xml code at the start of the said string;
+ Common::String pitch= Common::String::format("<pitch absmiddle=\"%d\">", _ttsState->_pitch);
+ str.replace((uint32)0, 0, pitch);
+
+ WCHAR *strW = Win32::ansiToUnicode(str.c_str());
+ bool result = _voice->Speak(strW, SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL) != S_OK;
+ free(strW);
+ _speechState = SPEAKING;
+ return result;
}
bool WindowsTextToSpeechManager::stop() {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return true;
+ if (isPaused())
+ resume();
+ _audio->SetState(SPAS_STOP, 0);
+ _audio->SetState(SPAS_RUN, 0);
+ _voice->Speak(NULL, SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0);
+ _speechState = READY;
+ return false;
}
bool WindowsTextToSpeechManager::pause() {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return true;
+ if (isPaused())
+ return false;
+ _voice->Pause();
+ _speechState = PAUSED;
+ return false;
}
bool WindowsTextToSpeechManager::resume() {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return true;
+ if (!isPaused())
+ return false;
+ _voice->Resume();
+ if (isSpeaking())
+ _speechState = SPEAKING;
+ else
+ _speechState = READY;
+ return false;
}
bool WindowsTextToSpeechManager::isSpeaking() {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return false;
+ SPVOICESTATUS eventStatus;
+ _voice->GetStatus(&eventStatus, NULL);
+ return eventStatus.dwRunningState == SPRS_IS_SPEAKING;
}
bool WindowsTextToSpeechManager::isPaused() {
- return true;
+ return _speechState == PAUSED;
}
bool WindowsTextToSpeechManager::isReady() {
- return true;
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return false;
+ if (_speechState != PAUSED && !isSpeaking())
+ return true;
+ else
+ return false;
}
void WindowsTextToSpeechManager::setVoice(unsigned index) {
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return;
+ _voice->SetVoice((ISpObjectToken *) _ttsState->_availaibleVoices[index].getData());
}
void WindowsTextToSpeechManager::setRate(int rate) {
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return;
+ assert(rate >= -10 && rate <= 10);
+ _voice->SetRate(rate);
+ _ttsState->_rate = rate;
}
void WindowsTextToSpeechManager::setPitch(int pitch) {
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return;
+ _ttsState->_pitch = pitch;
}
void WindowsTextToSpeechManager::setVolume(unsigned volume) {
+ if(_speechState == BROKEN || _speechState == NO_VOICE)
+ return;
+ assert(volume <= 100);
+ _voice->SetVolume(volume);
+ _ttsState->_volume = volume;
}
int WindowsTextToSpeechManager::getVolume() {
- return 0;
+ return _ttsState->_volume;
+}
+
+void WindowsTextToSpeechManager::freeVoices() {
+ for(Common::TTSVoice *i = _ttsState->_availaibleVoices.begin(); i < _ttsState->_availaibleVoices.end(); i++) {
+ ISpObjectToken *voiceData = (ISpObjectToken *)i->getData();
+ voiceData->Release();
+ }
+ _ttsState->_availaibleVoices.clear();
}
void WindowsTextToSpeechManager::setLanguage(Common::String language) {
+ if (language == "C")
+ language = "en";
+ _ttsState->_language = language;
+ updateVoices();
+ setVoice(0);
}
-void WindowsTextToSpeechManager::createVoice(int typeNumber, Common::TTSVoice::Gender gender, char *description) {
+void WindowsTextToSpeechManager::createVoice(void *cpVoiceToken) {
+ ISpObjectToken *voiceToken = (ISpObjectToken *) cpVoiceToken;
+
+ // description
+ WCHAR *descW;
+ SpGetDescription(voiceToken, &descW);
+ char *buffer = Win32::unicodeToAnsi(descW);
+ Common::String desc = buffer;
+ free(buffer);
+
+ // voice attributes
+ HRESULT hr = S_OK;
+ ISpDataKey *key = nullptr;
+ hr = voiceToken->OpenKey(L"Attributes", &key);
+
+ if (FAILED(hr)) {
+ voiceToken->Release();
+ warning("Could not open attribute key for voice: %s", desc.c_str());
+ return;
+ }
+ LPWSTR data;
+
+ // language
+ hr = key->GetStringValue(L"Language", &data);
+ if (FAILED(hr)) {
+ voiceToken->Release();
+ warning("Could not get the language attribute for voice: %s", desc.c_str());
+ return;
+ }
+ buffer = Win32::unicodeToAnsi(data);
+ Common::String language = lcidToLocale(buffer);
+ free(buffer);
+ CoTaskMemFree(data);
+
+ // only get the voices for the current language
+ if (language != _ttsState->_language) {
+ voiceToken->Release();
+ return;
+ }
+
+ // gender
+ hr = key->GetStringValue(L"Gender", &data);
+ if (FAILED(hr)) {
+ voiceToken->Release();
+ warning("Could not get the gender attribute for voice: %s", desc.c_str());
+ return;
+ }
+ buffer = Win32::unicodeToAnsi(data);
+ Common::TTSVoice::Gender gender = !strcmp(buffer, "Male") ? Common::TTSVoice::MALE : Common::TTSVoice::FEMALE;
+ free(buffer);
+ CoTaskMemFree(data);
+
+ _ttsState->_availaibleVoices.push_back(Common::TTSVoice(gender, (void *) voiceToken, desc));
+}
+
+int strToInt(Common::String str) {
+ str.toUppercase();
+ int result = 0;
+ for(unsigned i = 0; i < str.size(); i++) {
+ if (str[i] < '0' || (str[i] > '9' && str[i] < 'A') || str[i] > 'F')
+ break;
+ int num = (str[i] <= '9') ? str[i] - '0' : str[i] - 55;
+ result = result * 16 + num;
+ }
+ return result;
+}
+
+Common::String WindowsTextToSpeechManager::lcidToLocale(Common::String lcid) {
+ LCID locale = strToInt(lcid);
+ int nchars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, NULL, 0);
+ wchar_t *languageCode = new wchar_t[nchars];
+ GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, languageCode, nchars);
+ char *resultTmp = Win32::unicodeToAnsi(languageCode);
+ Common::String result = resultTmp;
+ delete[] languageCode;
+ free(resultTmp);
+ return result;
}
void WindowsTextToSpeechManager::updateVoices() {
+ freeVoices();
+ HRESULT hr = S_OK;
+ ISpObjectToken *cpVoiceToken = nullptr;
+ IEnumSpObjectTokens *cpEnum = nullptr;
+ unsigned long ulCount = 0;
+
+ hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
+ if (SUCCEEDED(hr)) {
+ hr = cpEnum->GetCount(&ulCount);
+ }
+ _voice->SetVolume(0);
+ while (SUCCEEDED(hr) && ulCount--) {
+ hr = cpEnum->Next(1, &cpVoiceToken, NULL);
+ _voice->SetVoice(cpVoiceToken);
+ if(SUCCEEDED(_voice->Speak(L"hi, this is test", SPF_PURGEBEFORESPEAK | SPF_ASYNC | SPF_IS_NOT_XML, 0)))
+ createVoice(cpVoiceToken);
+ else
+ cpVoiceToken->Release();
+ }
+ _voice->SetVolume(_ttsState->_volume);
+ cpEnum->Release();
+
+ if(_ttsState->_availaibleVoices.size() == 0) {
+ _speechState = NO_VOICE;
+ warning("No voice is availaible");
+ } else if (_speechState == NO_VOICE)
+ _speechState = READY;
+}
+
+bool WindowsTextToSpeechManager::popState() {
+ if (_ttsState->_next == nullptr)
+ return true;
+
+ for (Common::TTSVoice *i = _ttsState->_availaibleVoices.begin(); i < _ttsState->_availaibleVoices.end(); i++) {
+ ISpObjectToken *voiceToken = (ISpObjectToken *) i->getData();
+ voiceToken->Release();
+ }
+
+ Common::TTSState *oldState = _ttsState;
+ _ttsState = _ttsState->_next;
+
+ delete oldState;
+ setLanguage(_ttsState->_language);
+ setPitch(_ttsState->_pitch);
+ setVolume(_ttsState->_volume);
+ setRate(_ttsState->_rate);
+ setVoice(_ttsState->_activeVoice);
+ return false;
}
#endif
diff --git a/backends/text-to-speech/windows/windows-text-to-speech.h b/backends/text-to-speech/windows/windows-text-to-speech.h
index 5daf57c44c..03a1806849 100644
--- a/backends/text-to-speech/windows/windows-text-to-speech.h
+++ b/backends/text-to-speech/windows/windows-text-to-speech.h
@@ -36,7 +36,8 @@ public:
READY,
PAUSED,
SPEAKING,
- BROKEN
+ BROKEN,
+ NO_VOICE
};
WindowsTextToSpeechManager();
@@ -63,10 +64,14 @@ public:
virtual void setLanguage(Common::String language);
+ virtual bool popState();
+
private:
void init();
virtual void updateVoices();
- void createVoice(int typeNumber, Common::TTSVoice::Gender, char *description);
+ void createVoice(void *cpVoiceToken);
+ void freeVoices();
+ Common::String lcidToLocale(Common::String lcid);
SpeechState _speechState;
};