Support for custom TTS engine for rendering assistant response (see #86)

2020-02-24 20:22:45 +01:00 · 2020-02-24 20:22:45 +01:00 · 87a51b391c
parent 40a29a8214
commit 87a51b391c
7 changed files with 120 additions and 21 deletions
--- a/platypush/backend/assistant/init.py
+++ b/platypush/backend/assistant/init.py
@ -1,12 +1,26 @@
 import threading
+from typing import Optional, Dict, Any, Tuple

 from platypush.backend import Backend
+from platypush.context import get_plugin
+from platypush.plugins.tts import TtsPlugin


 class AssistantBackend(Backend):
-    def __init__(self, **kwargs):
+    def __init__(self, tts_plugin: Optional[str] = None, tts_args: Optional[Dict[str, Any]] = None, **kwargs):
+        """
+        Default assistant backend constructor.
+
+        :param tts_plugin: If set, and if the assistant returns the processed response as text, then the processed
+            response will be played through the selected text-to-speech plugin (can be e.g. "``tts``",
+            "``tts.google``" or any other implementation of :class:`platypush.plugins.tts.TtsPlugin`).
+        :param tts_args: Extra parameters to pass to the ``say`` method of the selected TTS plugin (e.g.
+            language, voice or gender).
+        """
        super().__init__(**kwargs)
        self._detection_paused = threading.Event()
+        self.tts_plugin = tts_plugin
+        self.tts_args = tts_args or {}

    def pause_detection(self):
        self._detection_paused.set()
@ -17,5 +31,8 @@ class AssistantBackend(Backend):
    def is_detecting(self):
        return not self._detection_paused.is_set()

+    def _get_tts_plugin(self) -> Tuple[Optional[TtsPlugin], Dict[str, Any]]:
+        return get_plugin(self.tts_plugin) if self.tts_plugin else None, self.tts_args
+

 # vim:sw=4:ts=4:et:
--- a/platypush/backend/assistant/google.py
+++ b/platypush/backend/assistant/google.py
@ -102,6 +102,11 @@ class AssistantGoogleBackend(AssistantBackend):
        elif hasattr(EventType, 'ON_RENDER_RESPONSE') and \
                event.type == EventType.ON_RENDER_RESPONSE:
            self.bus.post(ResponseEvent(assistant=self, response_text=event.args.get('text')))
+            tts, args = self._get_tts_plugin()
+
+            if tts and 'text' in event.args:
+                self.stop_conversation()
+                tts.say(text=event.args['text'], **args)
        elif hasattr(EventType, 'ON_RESPONDING_STARTED') and \
                event.type == EventType.ON_RESPONDING_STARTED and \
                event.args.get('is_error_response', False) is True:
@ -141,6 +146,20 @@ class AssistantGoogleBackend(AssistantBackend):
        if self.assistant:
            self.assistant.stop_conversation()

+    def set_mic_mute(self, muted):
+        if not self.assistant:
+            self.logger.warning('Assistant not running')
+            return
+
+        self.assistant.set_mic_mute(muted)
+
+    def send_text_query(self, query):
+        if not self.assistant:
+            self.logger.warning('Assistant not running')
+            return
+
+        self.assistant.send_text_query(query)
+
    def run(self):
        import google.oauth2.credentials
        from google.assistant.library import Assistant
@ -148,9 +167,7 @@ class AssistantGoogleBackend(AssistantBackend):
        super().run()

        with open(self.credentials_file, 'r') as f:
-            self.credentials = google.oauth2.credentials.Credentials(
-                token=None,
-                **json.load(f))
+            self.credentials = google.oauth2.credentials.Credentials(token=None, **json.load(f))

        while not self.should_stop():
            self._has_error = False
--- a/platypush/backend/assistant/snowboy/init.py
+++ b/platypush/backend/assistant/snowboy/init.py
@ -125,6 +125,8 @@ class AssistantSnowboyBackend(AssistantBackend):
                'detect_sound': detect_sound,
                'assistant_plugin': get_plugin(assistant_plugin_name) if assistant_plugin_name else None,
                'assistant_language': conf.get('assistant_language'),
+                'tts_plugin': conf.get('tts_plugin'),
+                'tts_args': conf.get('tts_args', {}),
            }

    def hotword_detected(self, hotword):
@ -150,12 +152,15 @@ class AssistantSnowboyBackend(AssistantBackend):
            detect_sound = model.get('detect_sound')
            assistant_plugin = model.get('assistant_plugin')
            assistant_language = model.get('assistant_language')
+            tts_plugin = model.get('tts_plugin')
+            tts_args = model.get('tts_args')

            if detect_sound:
                threading.Thread(target=sound_thread, args=(detect_sound,)).start()

            if assistant_plugin:
-                assistant_plugin.start_conversation(language=assistant_language)
+                assistant_plugin.start_conversation(language=assistant_language, tts_plugin=tts_plugin,
+                                                    tts_args=tts_args)

        return callback

--- a/platypush/plugins/assistant/init.py
+++ b/platypush/plugins/assistant/init.py
@ -10,7 +10,7 @@ class AssistantPlugin(ABC, Plugin):
    """

    @abstractmethod
-    def start_conversation(self, *args, language=None, **kwargs):
+    def start_conversation(self, *args, language=None, tts_plugin=None, tts_args=None, **kwargs):
        """
        Start a conversation.
        """
--- a/platypush/plugins/assistant/google/init.py
+++ b/platypush/plugins/assistant/google/init.py
@ -2,6 +2,7 @@
 .. moduleauthor:: Fabio Manganiello <blacklight86@gmail.com>
 """

+from platypush.backend.assistant.google import AssistantGoogleBackend
 from platypush.context import get_backend
 from platypush.plugins import action
 from platypush.plugins.assistant import AssistantPlugin
@ -17,11 +18,11 @@ class AssistantGooglePlugin(AssistantPlugin):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

-    def _get_assistant(self):
+    def _get_assistant(self) -> AssistantGoogleBackend:
        return get_backend('assistant.google')

    @action
-    def start_conversation(self):
+    def start_conversation(self, **kwargs):
        """
        Programmatically start a conversation with the assistant
        """
@ -36,5 +37,25 @@ class AssistantGooglePlugin(AssistantPlugin):
        assistant = self._get_assistant()
        assistant.stop_conversation()

+    @action
+    def set_mic_mute(self, muted: bool = True):
+        """
+        Programmatically mute/unmute the microphone.
+
+        :param muted: Set to True or False.
+        """
+        assistant = self._get_assistant()
+        assistant.set_mic_mute(muted)
+
+    @action
+    def send_text_query(self, query: str):
+        """
+        Send a text query to the assistant.
+
+        :param query: Query to be sent.
+        """
+        assistant = self._get_assistant()
+        assistant.send_text_query(query)
+

 # vim:sw=4:ts=4:et:
--- a/platypush/plugins/assistant/google/lib/init.py
+++ b/platypush/plugins/assistant/google/lib/init.py
@ -72,9 +72,7 @@ class SampleAssistant(object):
        self.is_new_conversation = True

        # Create Google Assistant API gRPC client.
-        self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub(
-            channel
-        )
+        self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub(channel)
        self.deadline = deadline_sec

        self.device_handler = device_handler
@ -126,8 +124,7 @@ class SampleAssistant(object):

        # This generator yields AssistResponse proto messages
        # received from the gRPC Google Assistant API.
-        for resp in self.assistant.Assist(iter_log_assist_requests(),
-                                          self.deadline):
+        for resp in self.assistant.Assist(iter_log_assist_requests(), self.deadline):
            assistant_helpers.log_assist_response_without_audio(resp)
            if resp.event_type == END_OF_UTTERANCE:
                logging.info('End of audio request detected.')
@ -143,6 +140,7 @@ class SampleAssistant(object):
                    if len(r.transcript.strip())).strip()

                logging.info('Transcript of user request: "%s".', self.detected_speech)
+
            if len(resp.audio_out.audio_data) > 0:
                if not self.conversation_stream.playing:
                    self.conversation_stream.stop_recording()
@ -155,10 +153,12 @@ class SampleAssistant(object):
                    self.conversation_stream.write(resp.audio_out.audio_data)
                elif self.conversation_stream.playing:
                    self.conversation_stream.stop_playback()
+
            if resp.dialog_state_out.conversation_state:
                conversation_state = resp.dialog_state_out.conversation_state
                logging.debug('Updating conversation state.')
                self.conversation_state = conversation_state
+
            if resp.dialog_state_out.volume_percentage != 0:
                volume_percentage = resp.dialog_state_out.volume_percentage
                logging.info('Setting volume to %s%%', volume_percentage)
@ -166,11 +166,13 @@ class SampleAssistant(object):

                if self.on_volume_changed:
                    self.on_volume_changed(volume_percentage)
+
            if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON:
                continue_conversation = True
                logging.info('Expecting follow-on query from user.')
            elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE:
                continue_conversation = False
+
            if resp.device_action.device_request_json:
                device_request = json.loads(
                    resp.device_action.device_request_json
@ -178,6 +180,7 @@ class SampleAssistant(object):
                fs = self.device_handler(device_request)
                if fs:
                    device_actions_futures.extend(fs)
+
            if self.display and resp.screen_out.data:
                system_browser = browser_helpers.system_browser
                system_browser.display(resp.screen_out.data)
--- a/platypush/plugins/assistant/google/pushtotalk.py
+++ b/platypush/plugins/assistant/google/pushtotalk.py
@ -4,8 +4,9 @@

 import json
 import os
+from typing import Optional, Dict, Any

-from platypush.context import get_bus
+from platypush.context import get_bus, get_plugin
 from platypush.message.event.assistant import ConversationStartEvent, \
    ConversationEndEvent, SpeechRecognizedEvent, VolumeChangedEvent, \
    ResponseEvent
@ -48,6 +49,8 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
                     'device_config.json'),
                 language='en-US',
                 play_response=True,
+                 tts_plugin=None,
+                 tts_args=None,
                 **kwargs):
        """
        :param credentials_file: Path to the Google OAuth credentials file
@ -68,6 +71,12 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
        :param play_response: If True (default) then the plugin will play the assistant response upon processed
            response. Otherwise nothing will be played - but you may want to handle the ``ResponseEvent`` manually.
        :type play_response: bool
+
+        :param tts_plugin: Optional text-to-speech plugin to be used to process response text.
+        :type tts_plugin: str
+
+        :param tts_args: Optional arguments for the TTS plugin ``say`` method.
+        :type tts_args: dict
        """

        import googlesamples.assistant.grpc.audio_helpers as audio_helpers
@ -83,6 +92,8 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
        self.credentials_file = credentials_file
        self.device_config = device_config
        self.play_response = play_response
+        self.tts_plugin = tts_plugin
+        self.tts_args = tts_args or {}
        self.assistant = None
        self.interactions = []

@ -188,18 +199,26 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
            else:
                self.interactions[-1]['response'] = response

+            if self.tts_plugin:
+                tts = get_plugin(self.tts_plugin)
+                tts.say(response, **self.tts_args)
+
        return handler

    @action
-    def start_conversation(self, *args, language=None, **kwargs):
+    def start_conversation(self, *args, language: Optional[str] = None, tts_plugin: Optional[str] = None,
+                           tts_args: Optional[Dict[str, Any]] = None, **kwargs):
        """
        Start a conversation

-        :param language: Language code override (default: default configured language)
-        :type language: str
+        :param language: Language code override (default: default configured language).
+        :param tts_plugin: Optional text-to-speech plugin to be used for rendering text.
+        :param tts_args: Optional arguments for the TTS plugin say method.

        :returns: A list of the interactions that happen within the conversation.

+        ..code-block:: json
+
            [
                {
                    "request": "request 1",
@ -212,15 +231,16 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
                    "response": "response 2"

                }
-
            ]

        """

        from platypush.plugins.assistant.google.lib import SampleAssistant

-        if not language:
-            language = self.language
+        self.tts_plugin = tts_plugin
+        self.tts_args = tts_args
+        language = language or self.language
+        play_response = False if self.tts_plugin else self.play_response

        self._init_assistant()
        self.on_conversation_start()
@ -232,7 +252,7 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):
                             display=None,
                             channel=self.grpc_channel,
                             deadline_sec=self.grpc_deadline,
-                             play_response=self.play_response,
+                             play_response=play_response,
                             device_handler=self.device_handler,
                             on_conversation_start=self.on_conversation_start(),
                             on_conversation_end=self.on_conversation_end(),
@ -262,6 +282,22 @@ class AssistantGooglePushtotalkPlugin(AssistantPlugin):

            get_bus().post(ConversationEndEvent(assistant=self))

+    @action
+    def set_mic_mute(self, muted: bool = True):
+        """
+        Programmatically mute/unmute the microphone.
+
+        :param muted: Set to True or False.
+        """
+        if not self.conversation_stream:
+            self.logger.warning('The assistant is not running')
+            return
+
+        if muted:
+            self.conversation_stream.stop_recording()
+        else:
+            self.conversation_stream.start_recording()
+
    def _install_device_handlers(self):
        import googlesamples.assistant.grpc.device_helpers as device_helpers
        self.device_handler = device_helpers.DeviceRequestHandler(self.device_id)