From 87a51b391ca68fa507a81970c10ab88ab89d1683 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Mon, 24 Feb 2020 20:22:45 +0100 Subject: [PATCH] Support for custom TTS engine for rendering assistant response (see #86) --- platypush/backend/assistant/__init__.py | 19 ++++++- platypush/backend/assistant/google.py | 23 ++++++-- .../backend/assistant/snowboy/__init__.py | 7 ++- platypush/plugins/assistant/__init__.py | 2 +- .../plugins/assistant/google/__init__.py | 25 ++++++++- .../plugins/assistant/google/lib/__init__.py | 13 +++-- .../plugins/assistant/google/pushtotalk.py | 52 ++++++++++++++++--- 7 files changed, 120 insertions(+), 21 deletions(-) diff --git a/platypush/backend/assistant/__init__.py b/platypush/backend/assistant/__init__.py index 6ac54b95..0d7aa308 100644 --- a/platypush/backend/assistant/__init__.py +++ b/platypush/backend/assistant/__init__.py @@ -1,12 +1,26 @@ import threading +from typing import Optional, Dict, Any, Tuple from platypush.backend import Backend +from platypush.context import get_plugin +from platypush.plugins.tts import TtsPlugin class AssistantBackend(Backend): - def __init__(self, **kwargs): + def __init__(self, tts_plugin: Optional[str] = None, tts_args: Optional[Dict[str, Any]] = None, **kwargs): + """ + Default assistant backend constructor. + + :param tts_plugin: If set, and if the assistant returns the processed response as text, then the processed + response will be played through the selected text-to-speech plugin (can be e.g. "``tts``", + "``tts.google``" or any other implementation of :class:`platypush.plugins.tts.TtsPlugin`). + :param tts_args: Extra parameters to pass to the ``say`` method of the selected TTS plugin (e.g. + language, voice or gender). + """ super().__init__(**kwargs) self._detection_paused = threading.Event() + self.tts_plugin = tts_plugin + self.tts_args = tts_args or {} def pause_detection(self): self._detection_paused.set() @@ -17,5 +31,8 @@ class AssistantBackend(Backend): def is_detecting(self): return not self._detection_paused.is_set() + def _get_tts_plugin(self) -> Tuple[Optional[TtsPlugin], Dict[str, Any]]: + return get_plugin(self.tts_plugin) if self.tts_plugin else None, self.tts_args + # vim:sw=4:ts=4:et: diff --git a/platypush/backend/assistant/google.py b/platypush/backend/assistant/google.py index 3f6d3b6d..04023de5 100644 --- a/platypush/backend/assistant/google.py +++ b/platypush/backend/assistant/google.py @@ -102,6 +102,11 @@ class AssistantGoogleBackend(AssistantBackend): elif hasattr(EventType, 'ON_RENDER_RESPONSE') and \ event.type == EventType.ON_RENDER_RESPONSE: self.bus.post(ResponseEvent(assistant=self, response_text=event.args.get('text'))) + tts, args = self._get_tts_plugin() + + if tts and 'text' in event.args: + self.stop_conversation() + tts.say(text=event.args['text'], **args) elif hasattr(EventType, 'ON_RESPONDING_STARTED') and \ event.type == EventType.ON_RESPONDING_STARTED and \ event.args.get('is_error_response', False) is True: @@ -141,6 +146,20 @@ class AssistantGoogleBackend(AssistantBackend): if self.assistant: self.assistant.stop_conversation() + def set_mic_mute(self, muted): + if not self.assistant: + self.logger.warning('Assistant not running') + return + + self.assistant.set_mic_mute(muted) + + def send_text_query(self, query): + if not self.assistant: + self.logger.warning('Assistant not running') + return + + self.assistant.send_text_query(query) + def run(self): import google.oauth2.credentials from google.assistant.library import Assistant @@ -148,9 +167,7 @@ class AssistantGoogleBackend(AssistantBackend): super().run() with open(self.credentials_file, 'r') as f: - self.credentials = google.oauth2.credentials.Credentials( - token=None, - **json.load(f)) + self.credentials = google.oauth2.credentials.Credentials(token=None, **json.load(f)) while not self.should_stop(): self._has_error = False diff --git a/platypush/backend/assistant/snowboy/__init__.py b/platypush/backend/assistant/snowboy/__init__.py index 991f6f9d..6a1800ae 100644 --- a/platypush/backend/assistant/snowboy/__init__.py +++ b/platypush/backend/assistant/snowboy/__init__.py @@ -125,6 +125,8 @@ class AssistantSnowboyBackend(AssistantBackend): 'detect_sound': detect_sound, 'assistant_plugin': get_plugin(assistant_plugin_name) if assistant_plugin_name else None, 'assistant_language': conf.get('assistant_language'), + 'tts_plugin': conf.get('tts_plugin'), + 'tts_args': conf.get('tts_args', {}), } def hotword_detected(self, hotword): @@ -150,12 +152,15 @@ class AssistantSnowboyBackend(AssistantBackend): detect_sound = model.get('detect_sound') assistant_plugin = model.get('assistant_plugin') assistant_language = model.get('assistant_language') + tts_plugin = model.get('tts_plugin') + tts_args = model.get('tts_args') if detect_sound: threading.Thread(target=sound_thread, args=(detect_sound,)).start() if assistant_plugin: - assistant_plugin.start_conversation(language=assistant_language) + assistant_plugin.start_conversation(language=assistant_language, tts_plugin=tts_plugin, + tts_args=tts_args) return callback diff --git a/platypush/plugins/assistant/__init__.py b/platypush/plugins/assistant/__init__.py index 8c60ff1b..08b25e8c 100644 --- a/platypush/plugins/assistant/__init__.py +++ b/platypush/plugins/assistant/__init__.py @@ -10,7 +10,7 @@ class AssistantPlugin(ABC, Plugin): """ @abstractmethod - def start_conversation(self, *args, language=None, **kwargs): + def start_conversation(self, *args, language=None, tts_plugin=None, tts_args=None, **kwargs): """ Start a conversation. """ diff --git a/platypush/plugins/assistant/google/__init__.py b/platypush/plugins/assistant/google/__init__.py index 941052f4..8ad69b2d 100644 --- a/platypush/plugins/assistant/google/__init__.py +++ b/platypush/plugins/assistant/google/__init__.py @@ -2,6 +2,7 @@ .. moduleauthor:: Fabio Manganiello """ +from platypush.backend.assistant.google import AssistantGoogleBackend from platypush.context import get_backend from platypush.plugins import action from platypush.plugins.assistant import AssistantPlugin @@ -17,11 +18,11 @@ class AssistantGooglePlugin(AssistantPlugin): def __init__(self, **kwargs): super().__init__(**kwargs) - def _get_assistant(self): + def _get_assistant(self) -> AssistantGoogleBackend: return get_backend('assistant.google') @action - def start_conversation(self): + def start_conversation(self, **kwargs): """ Programmatically start a conversation with the assistant """ @@ -36,5 +37,25 @@ class AssistantGooglePlugin(AssistantPlugin): assistant = self._get_assistant() assistant.stop_conversation() + @action + def set_mic_mute(self, muted: bool = True): + """ + Programmatically mute/unmute the microphone. + + :param muted: Set to True or False. + """ + assistant = self._get_assistant() + assistant.set_mic_mute(muted) + + @action + def send_text_query(self, query: str): + """ + Send a text query to the assistant. + + :param query: Query to be sent. + """ + assistant = self._get_assistant() + assistant.send_text_query(query) + # vim:sw=4:ts=4:et: diff --git a/platypush/plugins/assistant/google/lib/__init__.py b/platypush/plugins/assistant/google/lib/__init__.py index ea5b1c5b..9953b93b 100644 --- a/platypush/plugins/assistant/google/lib/__init__.py +++ b/platypush/plugins/assistant/google/lib/__init__.py @@ -72,9 +72,7 @@ class SampleAssistant(object): self.is_new_conversation = True # Create Google Assistant API gRPC client. - self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub( - channel - ) + self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub(channel) self.deadline = deadline_sec self.device_handler = device_handler @@ -126,8 +124,7 @@ class SampleAssistant(object): # This generator yields AssistResponse proto messages # received from the gRPC Google Assistant API. - for resp in self.assistant.Assist(iter_log_assist_requests(), - self.deadline): + for resp in self.assistant.Assist(iter_log_assist_requests(), self.deadline): assistant_helpers.log_assist_response_without_audio(resp) if resp.event_type == END_OF_UTTERANCE: logging.info('End of audio request detected.') @@ -143,6 +140,7 @@ class SampleAssistant(object): if len(r.transcript.strip())).strip() logging.info('Transcript of user request: "%s".', self.detected_speech) + if len(resp.audio_out.audio_data) > 0: if not self.conversation_stream.playing: self.conversation_stream.stop_recording() @@ -155,10 +153,12 @@ class SampleAssistant(object): self.conversation_stream.write(resp.audio_out.audio_data) elif self.conversation_stream.playing: self.conversation_stream.stop_playback() + if resp.dialog_state_out.conversation_state: conversation_state = resp.dialog_state_out.conversation_state logging.debug('Updating conversation state.') self.conversation_state = conversation_state + if resp.dialog_state_out.volume_percentage != 0: volume_percentage = resp.dialog_state_out.volume_percentage logging.info('Setting volume to %s%%', volume_percentage) @@ -166,11 +166,13 @@ class SampleAssistant(object): if self.on_volume_changed: self.on_volume_changed(volume_percentage) + if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON: continue_conversation = True logging.info('Expecting follow-on query from user.') elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE: continue_conversation = False + if resp.device_action.device_request_json: device_request = json.loads( resp.device_action.device_request_json @@ -178,6 +180,7 @@ class SampleAssistant(object): fs = self.device_handler(device_request) if fs: device_actions_futures.extend(fs) + if self.display and resp.screen_out.data: system_browser = browser_helpers.system_browser system_browser.display(resp.screen_out.data) diff --git a/platypush/plugins/assistant/google/pushtotalk.py b/platypush/plugins/assistant/google/pushtotalk.py index 3b46ce19..4ec5a3c5 100644 --- a/platypush/plugins/assistant/google/pushtotalk.py +++ b/platypush/plugins/assistant/google/pushtotalk.py @@ -4,8 +4,9 @@ import json import os +from typing import Optional, Dict, Any -from platypush.context import get_bus +from platypush.context import get_bus, get_plugin from platypush.message.event.assistant import ConversationStartEvent, \ ConversationEndEvent, SpeechRecognizedEvent, VolumeChangedEvent, \ ResponseEvent @@ -48,6 +49,8 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): 'device_config.json'), language='en-US', play_response=True, + tts_plugin=None, + tts_args=None, **kwargs): """ :param credentials_file: Path to the Google OAuth credentials file @@ -68,6 +71,12 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): :param play_response: If True (default) then the plugin will play the assistant response upon processed response. Otherwise nothing will be played - but you may want to handle the ``ResponseEvent`` manually. :type play_response: bool + + :param tts_plugin: Optional text-to-speech plugin to be used to process response text. + :type tts_plugin: str + + :param tts_args: Optional arguments for the TTS plugin ``say`` method. + :type tts_args: dict """ import googlesamples.assistant.grpc.audio_helpers as audio_helpers @@ -83,6 +92,8 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): self.credentials_file = credentials_file self.device_config = device_config self.play_response = play_response + self.tts_plugin = tts_plugin + self.tts_args = tts_args or {} self.assistant = None self.interactions = [] @@ -188,18 +199,26 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): else: self.interactions[-1]['response'] = response + if self.tts_plugin: + tts = get_plugin(self.tts_plugin) + tts.say(response, **self.tts_args) + return handler @action - def start_conversation(self, *args, language=None, **kwargs): + def start_conversation(self, *args, language: Optional[str] = None, tts_plugin: Optional[str] = None, + tts_args: Optional[Dict[str, Any]] = None, **kwargs): """ Start a conversation - :param language: Language code override (default: default configured language) - :type language: str + :param language: Language code override (default: default configured language). + :param tts_plugin: Optional text-to-speech plugin to be used for rendering text. + :param tts_args: Optional arguments for the TTS plugin say method. :returns: A list of the interactions that happen within the conversation. + ..code-block:: json + [ { "request": "request 1", @@ -212,15 +231,16 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): "response": "response 2" } - ] """ from platypush.plugins.assistant.google.lib import SampleAssistant - if not language: - language = self.language + self.tts_plugin = tts_plugin + self.tts_args = tts_args + language = language or self.language + play_response = False if self.tts_plugin else self.play_response self._init_assistant() self.on_conversation_start() @@ -232,7 +252,7 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): display=None, channel=self.grpc_channel, deadline_sec=self.grpc_deadline, - play_response=self.play_response, + play_response=play_response, device_handler=self.device_handler, on_conversation_start=self.on_conversation_start(), on_conversation_end=self.on_conversation_end(), @@ -262,6 +282,22 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin): get_bus().post(ConversationEndEvent(assistant=self)) + @action + def set_mic_mute(self, muted: bool = True): + """ + Programmatically mute/unmute the microphone. + + :param muted: Set to True or False. + """ + if not self.conversation_stream: + self.logger.warning('The assistant is not running') + return + + if muted: + self.conversation_stream.stop_recording() + else: + self.conversation_stream.start_recording() + def _install_device_handlers(self): import googlesamples.assistant.grpc.device_helpers as device_helpers self.device_handler = device_helpers.DeviceRequestHandler(self.device_id)