From 0434419b31cc6c27d2fdebb34abdf0b3131f6b99 Mon Sep 17 00:00:00 2001 From: Thierry Crozat Date: Sun, 28 Jul 2019 14:56:38 +0100 Subject: TTS: Implement TextToSpeechManager for macOS --- backends/module.mk | 4 + backends/platform/sdl/macosx/macosx.cpp | 6 + .../text-to-speech/macosx/macosx-text-to-speech.h | 67 +++++++ .../text-to-speech/macosx/macosx-text-to-speech.mm | 206 +++++++++++++++++++++ configure | 10 + 5 files changed, 293 insertions(+) create mode 100644 backends/text-to-speech/macosx/macosx-text-to-speech.h create mode 100644 backends/text-to-speech/macosx/macosx-text-to-speech.mm diff --git a/backends/module.mk b/backends/module.mk index 04b38bfda1..11185fcf05 100644 --- a/backends/module.mk +++ b/backends/module.mk @@ -352,6 +352,10 @@ ifdef USE_WINDOWS_TTS MODULE_OBJS += \ text-to-speech/windows/windows-text-to-speech.o endif +ifdef USE_MACOSX_TTS +MODULE_OBJS += \ + text-to-speech/macosx/macosx-text-to-speech.o +endif # Include common rules include $(srcdir)/rules.mk diff --git a/backends/platform/sdl/macosx/macosx.cpp b/backends/platform/sdl/macosx/macosx.cpp index 3628168e71..3cca69b4a5 100644 --- a/backends/platform/sdl/macosx/macosx.cpp +++ b/backends/platform/sdl/macosx/macosx.cpp @@ -32,6 +32,7 @@ #include "backends/platform/sdl/macosx/macosx.h" #include "backends/updates/macosx/macosx-updates.h" #include "backends/taskbar/macosx/macosx-taskbar.h" +#include "backends/text-to-speech/macosx/macosx-text-to-speech.h" #include "backends/dialogs/macosx/macosx-dialogs.h" #include "backends/platform/sdl/macosx/macosx_wrapper.h" #include "backends/fs/posix/posix-fs.h" @@ -86,6 +87,11 @@ void OSystem_MacOSX::initBackend() { _updateManager = new MacOSXUpdateManager(); #endif +#ifdef USE_MACOSX_TTS + // Initialize Text to Speech manager + _textToSpeechManager = new MacOSXTextToSpeechManager(); +#endif + // Invoke parent implementation of this method OSystem_POSIX::initBackend(); } diff --git a/backends/text-to-speech/macosx/macosx-text-to-speech.h b/backends/text-to-speech/macosx/macosx-text-to-speech.h new file mode 100644 index 0000000000..fed0c05458 --- /dev/null +++ b/backends/text-to-speech/macosx/macosx-text-to-speech.h @@ -0,0 +1,67 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef BACKENDS_TEXT_TO_SPEECH_MACOSX_H +#define BACKENDS_TEXT_TO_SPEECH_MACOSX_H + +#include "common/scummsys.h" + +#if defined(USE_MACOSX_TTS) + +#include "common/text-to-speech.h" +#include "common/str.h" + +class MacOSXTextToSpeechManager : public Common::TextToSpeechManager { +public: + MacOSXTextToSpeechManager(); + virtual ~MacOSXTextToSpeechManager(); + + virtual bool say(Common::String str, Common::String charset = ""); + + virtual bool stop(); + virtual bool pause(); + virtual bool resume(); + + virtual bool isSpeaking(); + virtual bool isPaused(); + virtual bool isReady(); + + virtual void setVoice(unsigned index); + + virtual void setRate(int rate); + + virtual void setPitch(int pitch); + + virtual void setVolume(unsigned volume); + + virtual void setLanguage(Common::String language); + + virtual void freeVoiceData(void *data); + +private: + virtual void updateVoices(); +}; + +#endif + +#endif // BACKENDS_TEXT_TO_SPEECH_MACOSX_H + diff --git a/backends/text-to-speech/macosx/macosx-text-to-speech.mm b/backends/text-to-speech/macosx/macosx-text-to-speech.mm new file mode 100644 index 0000000000..46a05e6c98 --- /dev/null +++ b/backends/text-to-speech/macosx/macosx-text-to-speech.mm @@ -0,0 +1,206 @@ +/* ScummVM - Graphic Adventure Engine + * + * ScummVM is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +// Disable symbol overrides so that we can use system headers. +#define FORBIDDEN_SYMBOL_ALLOW_ALL + +#include "backends/text-to-speech/macosx/macosx-text-to-speech.h" + +#if defined(USE_MACOSX_TTS) +#include "common/translation.h" +#include +#include +#include + +NSSpeechSynthesizer* synthesizer; + +MacOSXTextToSpeechManager::MacOSXTextToSpeechManager() : Common::TextToSpeechManager() { + synthesizer = [[NSSpeechSynthesizer alloc] init]; + +#ifdef USE_TRANSLATION + setLanguage(TransMan.getCurrentLanguage()); +#else + setLanguage("en"); +#endif +} + +MacOSXTextToSpeechManager::~MacOSXTextToSpeechManager() { + [synthesizer release]; +} + +bool MacOSXTextToSpeechManager::say(Common::String text, Common::String encoding) { + if (encoding.empty()) { +#ifdef USE_TRANSLATION + encoding = TransMan.getCurrentCharset(); +#endif + } + + // Get current encoding + CFStringEncoding stringEncoding = kCFStringEncodingASCII; + if (!encoding.empty()) { + CFStringRef encStr = CFStringCreateWithCString(NULL, encoding.c_str(), kCFStringEncodingASCII); + stringEncoding = CFStringConvertIANACharSetNameToEncoding(encStr); + CFRelease(encStr); + } + + CFStringRef textNSString = CFStringCreateWithCString(NULL, text.c_str(), stringEncoding); + bool status = [synthesizer startSpeakingString:(NSString *)textNSString]; + CFRelease(textNSString); + return status; +} + +bool MacOSXTextToSpeechManager::stop() { + [synthesizer stopSpeaking]; + return true; +} + +bool MacOSXTextToSpeechManager::pause() { + // Should we use NSSpeechWordBoundary, or even NSSpeechSentenceBoundary? + [synthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + return true; +} + +bool MacOSXTextToSpeechManager::resume() { + [synthesizer continueSpeaking]; + return true; +} + +bool MacOSXTextToSpeechManager::isSpeaking() { + return [synthesizer isSpeaking]; +} + +bool MacOSXTextToSpeechManager::isPaused() { + NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil]; + return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] && [[statusDict objectForKey:NSSpeechStatusOutputPaused] boolValue]; +} + +bool MacOSXTextToSpeechManager::isReady() { + NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil]; + return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] == NO; +} + +void MacOSXTextToSpeechManager::setVoice(unsigned index) { + if (_ttsState->_availableVoices.empty()) + return; + assert(index < _ttsState->_availableVoices.size()); + Common::TTSVoice voice = _ttsState->_availableVoices[index]; + _ttsState->_activeVoice = index; + + [synthesizer setVoice:(NSString*)voice.getData()]; + + // Setting the voice reset the pitch and rate to the voice defaults. + // Apply back the modifiers. + int pitch = getPitch(), rate = getRate(); + Common::TextToSpeechManager::setPitch(0); + Common::TextToSpeechManager::setRate(0); + setPitch(pitch); + setRate(rate); +} + +void MacOSXTextToSpeechManager::setRate(int rate) { + int oldRate = getRate(); + Common::TextToSpeechManager::setRate(rate); + // The rate is a value between -100 and +100, with 0 being the default rate. + // Convert this to a multiplier between 0.5 and 1.5. + float oldRateMultiplier = 1.0f + oldRate / 200.0f; + float ratehMultiplier = 1.0f + rate / 200.0f; + synthesizer.rate = synthesizer.rate / oldRateMultiplier * ratehMultiplier; +} + +void MacOSXTextToSpeechManager::setPitch(int pitch) { + int oldPitch = getPitch(); + Common::TextToSpeechManager::setPitch(pitch); + // The pitch is a value between -100 and +100, with 0 being the default pitch. + // Convert this to a multiplier between 0.5 and 1.5 on the default voice pitch. + float oldPitchMultiplier = 1.0f + oldPitch / 200.0f; + float pitchMultiplier = 1.0f + pitch / 200.0f; + NSNumber *basePitchNumber = [synthesizer objectForProperty:NSSpeechPitchBaseProperty error:nil]; + float basePitch = [basePitchNumber floatValue] / oldPitchMultiplier * pitchMultiplier; + [synthesizer setObject:[NSNumber numberWithFloat:basePitch] forProperty:NSSpeechPitchBaseProperty error:nil]; +} + +void MacOSXTextToSpeechManager::setVolume(unsigned volume) { + Common::TextToSpeechManager::setVolume(volume); + synthesizer.volume = volume / 100.0f; +} + +void MacOSXTextToSpeechManager::setLanguage(Common::String language) { + Common::TextToSpeechManager::setLanguage(language); + updateVoices(); +} + +void MacOSXTextToSpeechManager::freeVoiceData(void *data) { + NSString* voiceId = (NSString*)data; + [voiceId release]; +} + +void MacOSXTextToSpeechManager::updateVoices() { + Common::String currentVoice; + if (!_ttsState->_availableVoices.empty()) + currentVoice = _ttsState->_availableVoices[_ttsState->_activeVoice].getDescription(); + _ttsState->_availableVoices.clear(); + int activeVoiceIndex = -1, defaultVoiceIndex = -1; + + Common::String lang = getLanguage(); + NSArray *voices = [NSSpeechSynthesizer availableVoices]; + NSString *defaultVoice = [NSSpeechSynthesizer defaultVoice]; + int voiceIndex = 0; + for (NSString *voiceId in voices) { + NSDictionary *voiceAttr = [NSSpeechSynthesizer attributesForVoice:voiceId]; + Common::String voiceLocale([[voiceAttr objectForKey:NSVoiceLocaleIdentifier] UTF8String]); + if (voiceLocale.hasPrefix(lang)) { + NSString *data = [[NSString alloc] initWithString:voiceId]; + Common::String name([[voiceAttr objectForKey:NSVoiceName] UTF8String]); + Common::TTSVoice::Gender gender = Common::TTSVoice::UNKNOWN_GENDER; + NSString *voiceGender = [voiceAttr objectForKey:NSVoiceGender]; + if (voiceGender != nil) { + // This can be VoiceGenderMale, VoiceGenderFemale, VoiceGenderNeuter + if ([voiceGender isEqualToString:@"VoiceGenderMale"]) + gender = Common::TTSVoice::MALE; + else if ([voiceGender isEqualToString:@"VoiceGenderFemale"]) + gender = Common::TTSVoice::FEMALE; + } + Common::TTSVoice::Age age = Common::TTSVoice::UNKNOWN_AGE; + NSNumber *voiceAge = [voiceAttr objectForKey:NSVoiceAge]; + if (voiceAge != nil) { + if ([voiceAge integerValue] < 18) + age = Common::TTSVoice::CHILD; + else + age = Common::TTSVoice::ADULT; + } + Common::TTSVoice voice(gender, age, data, name); + _ttsState->_availableVoices.push_back(voice); + if (name == currentVoice) + activeVoiceIndex = voiceIndex; + if (defaultVoice != nil && [defaultVoice isEqualToString:voiceId]) + defaultVoiceIndex = voiceIndex; + ++voiceIndex; + } + } + + if (activeVoiceIndex == -1) + activeVoiceIndex = defaultVoiceIndex == -1 ? 0 : defaultVoiceIndex; + setVoice(activeVoiceIndex); +} + + +#endif diff --git a/configure b/configure index f1907427a8..c0ef73be7f 100755 --- a/configure +++ b/configure @@ -169,6 +169,7 @@ _iconv=auto _tts=auto _linux_tts=no _windows_tts=no +_macosx_tts=no # Default option behavior yes/no _debug_build=auto _release_build=auto @@ -4224,6 +4225,9 @@ int main(void) { return 0; } EOF cc_check -lspeechd && _tts=yes ;; + darwin*) + _tts=yes + ;; esac fi echo "$_tts" @@ -5414,6 +5418,12 @@ else define_in_config_if_yes $_windows_tts 'USE_WINDOWS_TTS' append_var LIBS '-lsapi -lole32' ;; + darwin*) + echo "osx" + _tts=yes + _macosx_tts=yes + define_in_config_if_yes $_macosx_tts 'USE_MACOSX_TTS' + ;; *) echo "no" _tts=no -- cgit v1.2.3