TTS: Implement *_NO_REPEAT actions and Fix state synchronization issues on macOS

The NSSpeechSynthesizer is asynchronous and does not immediately start, pause, or stop the speech. As a result querrying the state of the NSSpeechSynthesizer does not alwats return the expected result (for example isSpeaking may not yet been true just after we requested starting to speak). So instead the TextToSpeechManager on macOS keeps track of the state itself.
author: Thierry Crozat 2019-08-03 00:47:37 +0100
committer: Filippos Karapetis 2019-09-01 22:47:55 +0300
commit: 13a32fc5e91d691214c975f5e545f432bd9c37ac (patch)
tree: eafd910f2ce04117846059f8b4fe1ba5b0c78877 /backends/text-to-speech/macosx
parent: bd2757138b88796754e1080f90e146c624bc58d7 (diff)
download: scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.tar.gz
scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.tar.bz2
scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.zip
2 files changed, 46 insertions, 12 deletions
diff --git a/backends/text-to-speech/macosx/macosx-text-to-speech.h b/backends/text-to-speech/macosx/macosx-text-to-speech.h
index 7c33a3b0c9..9ed418dafa 100644
--- a/backends/text-to-speech/macosx/macosx-text-to-speech.h
+++ b/backends/text-to-speech/macosx/macosx-text-to-speech.h
@@ -68,6 +68,8 @@ private:
 		SpeechText(const Common::String& txt, const Common::String& enc) : text(txt), encoding(enc) {}
 	};
 	Common::Queue<SpeechText> _messageQueue;
+	Common::String _currentSpeech;
+	bool _paused;
 };
 
 #endif
diff --git a/backends/text-to-speech/macosx/macosx-text-to-speech.mm b/backends/text-to-speech/macosx/macosx-text-to-speech.mm
index 9f668414b6..1e8a10e45b 100644
--- a/backends/text-to-speech/macosx/macosx-text-to-speech.mm
+++ b/backends/text-to-speech/macosx/macosx-text-to-speech.mm
@@ -53,7 +53,7 @@
 NSSpeechSynthesizer *synthesizer;
 MacOSXTextToSpeechManagerDelegate *synthesizerDelegate;
 
-MacOSXTextToSpeechManager::MacOSXTextToSpeechManager() : Common::TextToSpeechManager() {
+MacOSXTextToSpeechManager::MacOSXTextToSpeechManager() : Common::TextToSpeechManager(), _paused(false) {
 	synthesizer = [[NSSpeechSynthesizer alloc] init];
 	synthesizerDelegate = [[MacOSXTextToSpeechManagerDelegate alloc] initWithManager:this];
 	[synthesizer setDelegate:synthesizerDelegate];
@@ -71,13 +71,27 @@ MacOSXTextToSpeechManager::~MacOSXTextToSpeechManager() {
 }
 
 bool MacOSXTextToSpeechManager::say(Common::String text, Action action, Common::String encoding) {
-	if ([synthesizer isSpeaking]) {
+	if (isSpeaking()) {
+		// Interruptions are done on word boundaries for nice transitions.
+		// Should we interrupt immediately?
 		if (action == DROP)
 			return true;
 		else if (action == INTERRUPT) {
 			_messageQueue.clear();
-			// Should we use NSSpeechImmediateBoundary, or even NSSpeechSentenceBoundary?
 			[synthesizer stopSpeakingAtBoundary:NSSpeechWordBoundary];
+		} else if (action == INTERRUPT_NO_REPEAT) {
+			// If the new speech is the one being currently said, continue that speech but clear the queue.
+			// And otherwise both clear the queue and interrupt the current speech.
+			_messageQueue.clear();
+			if (_currentSpeech == text)
+				return true;
+			[synthesizer stopSpeakingAtBoundary:NSSpeechWordBoundary];
+		} else if (action == QUEUE_NO_REPEAT) {
+			if (!_messageQueue.empty()) {
+				if (_messageQueue.back().text == text)
+					return true;
+			} else if (_currentSpeech == text)
+				return true;
 		}
 	}
 
@@ -88,12 +102,13 @@ bool MacOSXTextToSpeechManager::say(Common::String text, Action action, Common::
 	}
 
 	_messageQueue.push(SpeechText(text, encoding));
-	if (![synthesizer isSpeaking])
+	if (!isSpeaking())
 		startNextSpeech();
 	return true;
 }
 
 bool MacOSXTextToSpeechManager::startNextSpeech() {
+	_currentSpeech.clear();
 	if (_messageQueue.empty())
 		return false;
 	SpeechText text = _messageQueue.pop();
@@ -108,39 +123,56 @@ bool MacOSXTextToSpeechManager::startNextSpeech() {
 	CFStringRef textNSString = CFStringCreateWithCString(NULL, text.text.c_str(), stringEncoding);
 	bool status = [synthesizer startSpeakingString:(NSString *)textNSString];
 	CFRelease(textNSString);
+	if (status)
+		_currentSpeech = text.text;
+
 	return status;
 }
 
 bool MacOSXTextToSpeechManager::stop() {
 	_messageQueue.clear();
-	// Should we use NSSpeechImmediateBoundary, or even NSSpeechSentenceBoundary?
-	[synthesizer stopSpeakingAtBoundary:NSSpeechWordBoundary];
+	_currentSpeech.clear(); // so that it immediately reports that it is no longer speeking
+	// Stop as soon as possible
+	[synthesizer stopSpeakingAtBoundary:NSSpeechImmediateBoundary];
 	return true;
 }
 
 bool MacOSXTextToSpeechManager::pause() {
-	// Should we use NSSpeechImmediateBoundary, or even NSSpeechSentenceBoundary?
+	// Pause on a word boundary as pausing/resuming in a middle of words is strange.
 	[synthesizer pauseSpeakingAtBoundary:NSSpeechWordBoundary];
+	_paused = true;
 	return true;
 }
 
 bool MacOSXTextToSpeechManager::resume() {
+	_paused = false;
 	[synthesizer continueSpeaking];
 	return true;
 }
 
 bool MacOSXTextToSpeechManager::isSpeaking() {
-	return [synthesizer isSpeaking];
+	// Because the NSSpeechSynthesizer is asynchronous, it doesn't start speeking immediately
+	// and thus using [synthesizer isSpeaking] just after [synthesizer startSpeakingString:]] is
+	// likely to return NO. So instead we check the _currentSpeech string (set when calling
+	// startSpeakingString, and cleared when we receive the didFinishSpeaking message).
+	//return [synthesizer isSpeaking];
+	return !_currentSpeech.empty();
 }
 
 bool MacOSXTextToSpeechManager::isPaused() {
-	NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
-	return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] && [[statusDict objectForKey:NSSpeechStatusOutputPaused] boolValue];
+	// Because the NSSpeechSynthesizer is asynchronous, and because we pause at the end of a word
+	// and not immediately, we cannot check the speech status as it is likely to not be paused yet
+	// immediately after we requested the pause. So we keep our own flag.
+	//NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
+	//return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] && [[statusDict objectForKey:NSSpeechStatusOutputPaused] boolValue];
+	return _paused;
 }
 
 bool MacOSXTextToSpeechManager::isReady() {
-	NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
-	return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] == NO;
+	// See comments in isSpeaking() and isPaused()
+	//NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
+	//return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] == NO;
+	return _currentSpeech.empty() && !_paused;
 }
 
 void MacOSXTextToSpeechManager::setVoice(unsigned index) {
author	Thierry Crozat	2019-08-03 00:47:37 +0100
committer	Filippos Karapetis	2019-09-01 22:47:55 +0300
commit	13a32fc5e91d691214c975f5e545f432bd9c37ac (patch)
tree	eafd910f2ce04117846059f8b4fe1ba5b0c78877 /backends/text-to-speech/macosx
parent	bd2757138b88796754e1080f90e146c624bc58d7 (diff)
download	scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.tar.gz scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.tar.bz2 scummvm-rg350-13a32fc5e91d691214c975f5e545f432bd9c37ac.zip