From 5766367402b27cd5df1e497a2cdec464003233a3 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Thu, 11 Jul 2019 22:54:33 +0200 Subject: [PATCH] Rewritten snowboy and Google push-to-talk plugins to make them a better replacement for the deprecated google-assistant-library --- .../{google/__init__.py => google.py} | 7 + .../backend/assistant/google/pushtotalk.py | 385 ------------------ .../backend/assistant/snowboy/__init__.py | 49 ++- .../js/plugins/assistant.google/index.js | 16 +- platypush/message/event/assistant/__init__.py | 9 + platypush/plugins/assistant/__init__.py | 19 + .../plugins/assistant/google/__init__.py | 1 - .../plugins/assistant/google/lib/__init__.py | 225 ++++++++++ .../plugins/assistant/google/pushtotalk.py | 244 ++++++++++- requirements.txt | 4 +- 10 files changed, 547 insertions(+), 412 deletions(-) rename platypush/backend/assistant/{google/__init__.py => google.py} (93%) delete mode 100644 platypush/backend/assistant/google/pushtotalk.py create mode 100644 platypush/plugins/assistant/google/lib/__init__.py diff --git a/platypush/backend/assistant/google/__init__.py b/platypush/backend/assistant/google.py similarity index 93% rename from platypush/backend/assistant/google/__init__.py rename to platypush/backend/assistant/google.py index fd43af41b..a27dd471f 100644 --- a/platypush/backend/assistant/google/__init__.py +++ b/platypush/backend/assistant/google.py @@ -28,6 +28,13 @@ class AssistantGoogleBackend(Backend): It listens for voice commands and post conversation events on the bus. + **WARNING**: This backend is deprecated, as the underlying Google Assistant + library has been deprecated too: https://developers.google.com/assistant/sdk/reference/library/python/ + The old library might still work on some systems but its proper functioning + is not guaranteed. + Please use the Snowboy backend for hotword detection and the Google Assistant + push-to-talk plugin for assistant interaction instead. + Triggers: * :class:`platypush.message.event.assistant.ConversationStartEvent` \ diff --git a/platypush/backend/assistant/google/pushtotalk.py b/platypush/backend/assistant/google/pushtotalk.py deleted file mode 100644 index 74d298630..000000000 --- a/platypush/backend/assistant/google/pushtotalk.py +++ /dev/null @@ -1,385 +0,0 @@ -""" -.. moduleauthor:: Fabio Manganiello -.. license: MIT -""" - -import concurrent -import json -import logging -import os - -import grpc -import google.auth.transport.grpc -import google.auth.transport.requests -import google.oauth2.credentials -from google.assistant.embedded.v1alpha2 import embedded_assistant_pb2, \ - embedded_assistant_pb2_grpc - -import googlesamples.assistant.grpc.audio_helpers as audio_helpers -import googlesamples.assistant.grpc.device_helpers as device_helpers -import googlesamples.assistant.grpc.assistant_helpers as assistant_helpers - -from tenacity import retry, stop_after_attempt, retry_if_exception - - -from platypush.backend import Backend -from platypush.message.event.assistant import \ - ConversationStartEvent, ConversationEndEvent, SpeechRecognizedEvent - - -class AssistantGooglePushtotalkBackend(Backend): - """ - Google Assistant pushtotalk backend. Instead of listening for the "OK - Google" hotword like the assistant.google backend, this implementation - programmatically starts a conversation upon start_conversation() method - call. Use this backend on devices that don't have an Assistant SDK package - (e.g. arm6 devices like the RaspberryPi Zero or the RaspberryPi 1). - - Triggers: - - * :class:`platypush.message.event.assistant.ConversationStartEvent` \ - when a new conversation starts - * :class:`platypush.message.event.assistant.SpeechRecognizedEvent` \ - when a new voice command is recognized - * :class:`platypush.message.event.assistant.ConversationEndEvent` \ - when a new conversation ends - - Requires: - - * **tenacity** (``pip install tenacity``) - * **grpc** (``pip install grpc``) - * **google-assistant-grpc** (``pip install google-assistant-grpc``) - """ - - api_endpoint = 'embeddedassistant.googleapis.com' - audio_sample_rate = audio_helpers.DEFAULT_AUDIO_SAMPLE_RATE - audio_sample_width = audio_helpers.DEFAULT_AUDIO_SAMPLE_WIDTH - audio_iter_size = audio_helpers.DEFAULT_AUDIO_ITER_SIZE - audio_block_size = audio_helpers.DEFAULT_AUDIO_DEVICE_BLOCK_SIZE - audio_flush_size = audio_helpers.DEFAULT_AUDIO_DEVICE_FLUSH_SIZE - grpc_deadline = 60 * 3 + 5 - - def __init__(self, *args, - credentials_file=os.path.join( - os.path.expanduser('~'), '.config', - 'google-oauthlib-tool', 'credentials.json'), - device_config=os.path.join( - os.path.expanduser('~'), '.config', 'googlesamples-assistant', - 'device_config.json'), - lang='en-US', - conversation_start_fifo=os.path.join(os.path.sep, 'tmp', 'pushtotalk.fifo'), - **kwargs): - """ - :param credentials_file: Path to the Google OAuth credentials file \ - (default: ~/.config/google-oauthlib-tool/credentials.json). \ - See https://developers.google.com/assistant/sdk/guides/library/python/embed/install-sample#generate_credentials \ - for instructions to get your own credentials file. - :type credentials_file: str - - :param device_config: Path to device_config.json. Register your device \ - (see https://developers.google.com/assistant/sdk/guides/library/python/embed/register-device) \ - and create a project, then run the pushtotalk.py script from \ - googlesamples to create your device_config.json - :type device_config: str - - :param lang: Assistant language (default: en-US) - :type lang: str - """ - - super().__init__(*args, **kwargs) - - self.lang = lang - self.credentials_file = credentials_file - self.device_config = device_config - self.conversation_start_fifo = conversation_start_fifo - self.assistant = None - - try: - os.mkfifo(self.conversation_start_fifo) - except FileExistsError: - pass - - with open(self.device_config) as f: - device = json.load(f) - self.device_id = device['id'] - self.device_model_id = device['model_id'] - - # Load OAuth 2.0 credentials. - try: - with open(self.credentials_file, 'r') as f: - credentials = google.oauth2.credentials.Credentials(token=None, - **json.load(f)) - http_request = google.auth.transport.requests.Request() - credentials.refresh(http_request) - except Exception as ex: - self.logger.error('Error loading credentials: %s', str(ex)) - self.logger.error('Run google-oauthlib-tool to initialize ' - 'new OAuth 2.0 credentials.') - raise - - # Create an authorized gRPC channel. - self.grpc_channel = google.auth.transport.grpc.secure_authorized_channel( - credentials, http_request, self.api_endpoint) - self.logger.info('Connecting to %s', self.api_endpoint) - - # Configure audio source and sink. - audio_device = None - audio_source = audio_device = ( - audio_device or audio_helpers.SoundDeviceStream( - sample_rate=self.audio_sample_rate, - sample_width=self.audio_sample_width, - block_size=self.audio_block_size, - flush_size=self.audio_flush_size - ) - ) - - audio_sink = audio_device = ( - audio_device or audio_helpers.SoundDeviceStream( - sample_rate=self.audio_sample_rate, - sample_width=self.audio_sample_width, - block_size=self.audio_block_size, - flush_size=self.audio_flush_size - ) - ) - - # Create conversation stream with the given audio source and sink. - self.conversation_stream = audio_helpers.ConversationStream( - source=audio_source, - sink=audio_sink, - iter_size=self.audio_iter_size, - sample_width=self.audio_sample_width, - ) - - self.device_handler = device_helpers.DeviceRequestHandler(self.device_id) - - def start_conversation(self): - """ Start a conversation """ - if self.assistant: - with open(self.conversation_start_fifo, 'w') as f: - f.write('1') - - def stop_conversation(self): - """ Stop a conversation """ - if self.assistant: - self.conversation_stream.stop_playback() - self.bus.post(ConversationEndEvent()) - - def on_conversation_start(self): - """ Conversation start handler """ - self.bus.post(ConversationStartEvent()) - - def on_conversation_end(self): - """ Conversation end handler """ - self.bus.post(ConversationEndEvent()) - - def on_speech_recognized(self, speech): - """ Speech recognized handler """ - self.bus.post(SpeechRecognizedEvent(phrase=speech)) - - def run(self): - """ Backend executor """ - super().run() - - with SampleAssistant(self.lang, self.device_model_id, self.device_id, - self.conversation_stream, - self.grpc_channel, self.grpc_deadline, - self.device_handler, - on_conversation_start=self.on_conversation_start, - on_conversation_end=self.on_conversation_end, - on_speech_recognized=self.on_speech_recognized) as self.assistant: - while not self.should_stop(): - with open(self.conversation_start_fifo, 'r') as f: - f.read() - - self.logger.info('Received conversation start event') - continue_conversation = True - - while continue_conversation: - (user_request, continue_conversation) = self.assistant.assist() - self.logger('User request: {}'.format(user_request)) - - self.on_conversation_end() - - -class SampleAssistant: - """Sample Assistant that supports conversations and device actions. - - Args: - device_model_id: identifier of the device model. - device_id: identifier of the registered device instance. - conversation_stream(ConversationStream): audio stream for recording \ - query and playing back assistant answer. - channel: authorized gRPC channel for connection to the Google Assistant API. - deadline_sec: gRPC deadline in seconds for Google Assistant API call. - device_handler: callback for device actions. - """ - - END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE - DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON - CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE - - def __init__(self, language_code, device_model_id, device_id, - conversation_stream, - channel, deadline_sec, device_handler, - on_conversation_start=None, - on_conversation_end=None, - on_speech_recognized=None): - self.language_code = language_code - self.device_model_id = device_model_id - self.device_id = device_id - self.conversation_stream = conversation_stream - self.logger = logging.getLogger(__name__) - - self.on_conversation_start = on_conversation_start - self.on_conversation_end = on_conversation_end - self.on_speech_recognized = on_speech_recognized - - # Opaque blob provided in AssistResponse that, - # when provided in a follow-up AssistRequest, - # gives the Assistant a context marker within the current state - # of the multi-Assist()-RPC "conversation". - # This value, along with MicrophoneMode, supports a more natural - # "conversation" with the Assistant. - self.conversation_state = None - - # Create Google Assistant API gRPC client. - self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub( - channel - ) - self.deadline = deadline_sec - - self.device_handler = device_handler - - def __enter__(self): - return self - - def __exit__(self, etype, e, traceback): - if e: - return False - self.conversation_stream.close() - return True - - @staticmethod - def is_grpc_error_unavailable(e): - """ Returns True if the gRPC is not available """ - is_grpc_error = isinstance(e, grpc.RpcError) - if is_grpc_error and (e.code() == grpc.StatusCode.UNAVAILABLE): - print('grpc unavailable error: {}'.format(e)) - return True - return False - - @retry(reraise=True, stop=stop_after_attempt(3), - retry=retry_if_exception(is_grpc_error_unavailable)) - def assist(self): - """Send a voice request to the Assistant and playback the response. - - Returns: True if conversation should continue. - """ - continue_conversation = False - device_actions_futures = [] - - self.conversation_stream.start_recording() - self.logger.info('Recording audio request.') - - if self.on_conversation_start: - self.on_conversation_start() - - def iter_assist_requests(): - for c in self.gen_assist_requests(): - assistant_helpers.log_assist_request_without_audio(c) - yield c - self.conversation_stream.start_playback() - - user_request = None - - # This generator yields AssistResponse proto messages - # received from the gRPC Google Assistant API. - for resp in self.assistant.Assist(iter_assist_requests(), - self.deadline): - assistant_helpers.log_assist_response_without_audio(resp) - if resp.event_type == self.END_OF_UTTERANCE: - self.logger.info('End of audio request detected') - self.conversation_stream.stop_recording() - if resp.speech_results: - user_request = ' '.join( - r.transcript for r in resp.speech_results) - - self.logger.info('Transcript of user request: "%s".', user_request) - self.logger.info('Playing assistant response.') - if resp.audio_out.audio_data: - self.conversation_stream.write(resp.audio_out.audio_data) - if resp.dialog_state_out.conversation_state: - conversation_state = resp.dialog_state_out.conversation_state - self.logger.debug('Updating conversation state.') - self.conversation_state = conversation_state - if resp.dialog_state_out.volume_percentage != 0: - volume_percentage = resp.dialog_state_out.volume_percentage - self.logger.info('Setting volume to %s%%', volume_percentage) - self.conversation_stream.volume_percentage = volume_percentage - if resp.dialog_state_out.microphone_mode == self.DIALOG_FOLLOW_ON: - continue_conversation = True - self.logger.info('Expecting follow-on query from user.') - elif resp.dialog_state_out.microphone_mode == self.CLOSE_MICROPHONE: - continue_conversation = False - if resp.device_action.device_request_json: - device_request = json.loads( - resp.device_action.device_request_json - ) - fs = self.device_handler(device_request) - if fs: - device_actions_futures.extend(fs) - - if device_actions_futures: - self.logger.info('Waiting for device executions to complete.') - concurrent.futures.wait(device_actions_futures) - - self.logger.info('Finished playing assistant response.') - - try: - self.conversation_stream.stop_playback() - except Exception: - pass - - if user_request and self.on_speech_recognized: - self.on_speech_recognized(user_request) - - return (user_request, continue_conversation) - - def gen_assist_requests(self): - """Yields: AssistRequest messages to send to the API.""" - - dialog_state_in = embedded_assistant_pb2.DialogStateIn( - language_code=self.language_code, - conversation_state=b'' - ) - - if self.conversation_state: - self.logger.debug('Sending conversation state.') - dialog_state_in.conversation_state = self.conversation_state - - config = embedded_assistant_pb2.AssistConfig( - audio_in_config=embedded_assistant_pb2.AudioInConfig( - encoding='LINEAR16', - sample_rate_hertz=self.conversation_stream.sample_rate, - ), - audio_out_config=embedded_assistant_pb2.AudioOutConfig( - encoding='LINEAR16', - sample_rate_hertz=self.conversation_stream.sample_rate, - volume_percentage=self.conversation_stream.volume_percentage, - ), - dialog_state_in=dialog_state_in, - device_config=embedded_assistant_pb2.DeviceConfig( - device_id=self.device_id, - device_model_id=self.device_model_id, - ) - ) - - # The first AssistRequest must contain the AssistConfig - # and no audio data. - yield embedded_assistant_pb2.AssistRequest(config=config) - for data in self.conversation_stream: - # Subsequent requests need audio data, but not config. - yield embedded_assistant_pb2.AssistRequest(audio_in=data) - - -# vim:sw=4:ts=4:et: diff --git a/platypush/backend/assistant/snowboy/__init__.py b/platypush/backend/assistant/snowboy/__init__.py index 6d63e2265..209f9e6e4 100644 --- a/platypush/backend/assistant/snowboy/__init__.py +++ b/platypush/backend/assistant/snowboy/__init__.py @@ -9,10 +9,12 @@ import subprocess import time from platypush.backend import Backend +from platypush.context import get_plugin from platypush.message.event.assistant import \ ConversationStartEvent, ConversationEndEvent, \ SpeechRecognizedEvent, HotwordDetectedEvent + class AssistantSnowboyBackend(Backend): """ Backend for detecting custom voice hotwords through Snowboy. The purpose of @@ -30,10 +32,26 @@ class AssistantSnowboyBackend(Backend): Requires: * **snowboy** (``pip install snowboy``) + + Manual installation for snowboy and its Python bindings if the command above + fails: + + $ [sudo] apt-get install libatlas-base-dev swig + $ [sudo] pip install pyaudio + $ git clone https://github.com/Kitt-AI/snowboy + $ cd snowboy/swig/Python3 + $ make + $ cd ../.. + $ python3 setup.py build + $ [sudo] python setup.py install + + You will also need a voice model for the hotword detection. You can find + some under the ``resources/models`` directory of the Snowboy repository, + or train/download other models from https://snowboy.kitt.ai. """ def __init__(self, voice_model_file, hotword=None, sensitivity=0.5, - audio_gain=1.0, **kwargs): + audio_gain=1.0, assistant_plugin=None, **kwargs): """ :param voice_model_file: Snowboy voice model file - \ see https://snowboy.kitt.ai/ @@ -42,20 +60,33 @@ class AssistantSnowboyBackend(Backend): :param hotword: Name of the hotword :type hotword: str - :param sensitivity: Hotword recognition sensitivity, between 0 and 1 + :param sensitivity: Hotword recognition sensitivity, between 0 and 1. + Default: 0.5. :type sensitivity: float :param audio_gain: Audio gain, between 0 and 1 :type audio_gain: float + + :param assistant_plugin: By default Snowboy fires a + :class:`platypush.message.event.assistant.HotwordDetectedEvent` event + whenever the hotword is detected. You can also pass the plugin name of + a :class:`platypush.plugins.assistant.AssistantPlugin` instance + (for example ``assistant.google.pushtotalk``). If set, then the + assistant plugin will be invoked to start a conversation. + :type assistant_plugin: str """ - from snowboy import snowboydecoder + try: + import snowboydecoder + except ImportError: + import snowboy.snowboydecoder as snowboydecoder super().__init__(**kwargs) - self.voice_model_file = voice_model_file + self.voice_model_file = os.path.abspath(os.path.expanduser(voice_model_file)) self.hotword = hotword self.sensitivity = sensitivity self.audio_gain = audio_gain + self.assistant_plugin = assistant_plugin self.detector = snowboydecoder.HotwordDetector( self.voice_model_file, sensitivity=self.sensitivity, @@ -70,8 +101,18 @@ class AssistantSnowboyBackend(Backend): def callback(): self.bus.post(HotwordDetectedEvent(hotword=self.hotword)) + + if self.assistant_plugin: + # Trigger assistant conversation + get_plugin(self.assistant_plugin).start_conversation() + return callback + def on_stop(self): + if self.detector: + self.detector.terminate() + self.detector = None + def run(self): super().run() self.detector.start(self.hotword_detected()) diff --git a/platypush/backend/http/static/js/plugins/assistant.google/index.js b/platypush/backend/http/static/js/plugins/assistant.google/index.js index 4ed37e15b..24e39d9d9 100644 --- a/platypush/backend/http/static/js/plugins/assistant.google/index.js +++ b/platypush/backend/http/static/js/plugins/assistant.google/index.js @@ -27,6 +27,7 @@ const Assistant = Vue.extend({ responseText: '', phrase: '', visible: false, + hideTimeout: undefined, state: { listening: false, @@ -51,11 +52,21 @@ const Assistant = Vue.extend({ this.reset(); this.state.listening = true; this.visible = true; + + if (this.hideTimeout) { + clearTimeout(this.hideTimeout); + this.hideTimeout = undefined; + } }, conversationEnd: function() { - this.reset(); - this.visible = false; + const self = this; + + this.hideTimeout = setTimeout(() => { + this.reset(); + self.visible = false; + self.hideTimeout = undefined; + }, 4000); }, speechRecognized: function(event) { @@ -86,7 +97,6 @@ const Assistant = Vue.extend({ registerHandlers: function() { registerEventHandler(this.conversationStart, 'platypush.message.event.assistant.ConversationStartEvent'); - registerEventHandler(this.conversationStart, 'platypush.message.event.assistant.HotwordDetectedEvent'); registerEventHandler(this.alertOn, 'platypush.message.event.assistant.AlertStartedEvent'); registerEventHandler(this.alertOff, 'platypush.message.event.assistant.AlertEndEvent'); registerEventHandler(this.speechRecognized, 'platypush.message.event.assistant.SpeechRecognizedEvent'); diff --git a/platypush/message/event/assistant/__init__.py b/platypush/message/event/assistant/__init__.py index 5b2e689ff..22a3e0005 100644 --- a/platypush/message/event/assistant/__init__.py +++ b/platypush/message/event/assistant/__init__.py @@ -118,6 +118,15 @@ class HotwordDetectedEvent(AssistantEvent): super().__init__(*args, hotword=hotword, **kwargs) +class VolumeChangedEvent(AssistantEvent): + """ + Event triggered when the volume of the assistant changes + """ + + def __init__(self, volume, *args, **kwargs): + super().__init__(*args, volume=volume, **kwargs) + + class AlertStartedEvent(AssistantEvent): """ Event triggered when an alert starts on the assistant diff --git a/platypush/plugins/assistant/__init__.py b/platypush/plugins/assistant/__init__.py index e69de29bb..e467d59f3 100644 --- a/platypush/plugins/assistant/__init__.py +++ b/platypush/plugins/assistant/__init__.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod + +from platypush.plugins import Plugin + +class AssistantPlugin(ABC, Plugin): + """ + Base class for assistant plugins + """ + + @abstractmethod + def start_conversation(self, *args, **kwargs): + raise NotImplementedError + + @abstractmethod + def stop_conversation(self, *args, **kwargs): + raise NotImplementedError + + +# vim:sw=4:ts=4:et: diff --git a/platypush/plugins/assistant/google/__init__.py b/platypush/plugins/assistant/google/__init__.py index 5fa9e43ae..296f5fae7 100644 --- a/platypush/plugins/assistant/google/__init__.py +++ b/platypush/plugins/assistant/google/__init__.py @@ -33,4 +33,3 @@ class AssistantGooglePlugin(Plugin): # vim:sw=4:ts=4:et: - diff --git a/platypush/plugins/assistant/google/lib/__init__.py b/platypush/plugins/assistant/google/lib/__init__.py new file mode 100644 index 000000000..053290ab5 --- /dev/null +++ b/platypush/plugins/assistant/google/lib/__init__.py @@ -0,0 +1,225 @@ +"""Based on Google pushtotalk.py sample.""" + +import concurrent.futures +import json +import logging + +import grpc + +from google.assistant.embedded.v1alpha2 import ( + embedded_assistant_pb2, + embedded_assistant_pb2_grpc +) +from tenacity import retry, stop_after_attempt, retry_if_exception + +try: + from googlesamples.assistant.grpc import ( + assistant_helpers, + audio_helpers, + browser_helpers, + device_helpers + ) +except (SystemError, ImportError): + import assistant_helpers + import audio_helpers + import browser_helpers + import device_helpers + + +ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com' +END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE +DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON +CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE +PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING +DEFAULT_GRPC_DEADLINE = 60 * 3 + 5 + + +class SampleAssistant(object): + """Sample Assistant that supports conversations and device actions. + + Args: + device_model_id: identifier of the device model. + device_id: identifier of the registered device instance. + conversation_stream(ConversationStream): audio stream + for recording query and playing back assistant answer. + channel: authorized gRPC channel for connection to the + Google Assistant API. + deadline_sec: gRPC deadline in seconds for Google Assistant API call. + device_handler: callback for device actions. + """ + + def __init__(self, language_code, device_model_id, device_id, + conversation_stream, display, + channel, deadline_sec, device_handler, + on_conversation_start=None, on_conversation_end=None, + on_speech_recognized=None, on_volume_changed=None, + on_response=None): + self.language_code = language_code + self.device_model_id = device_model_id + self.device_id = device_id + self.conversation_stream = conversation_stream + self.display = display + + # Opaque blob provided in AssistResponse that, + # when provided in a follow-up AssistRequest, + # gives the Assistant a context marker within the current state + # of the multi-Assist()-RPC "conversation". + # This value, along with MicrophoneMode, supports a more natural + # "conversation" with the Assistant. + self.conversation_state = None + # Force reset of first conversation. + self.is_new_conversation = True + + # Create Google Assistant API gRPC client. + self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub( + channel + ) + self.deadline = deadline_sec + + self.device_handler = device_handler + self.detected_speech = None + + self.on_conversation_start = on_conversation_start + self.on_conversation_end = on_conversation_end + self.on_speech_recognized = on_speech_recognized + self.on_volume_changed = on_volume_changed + self.on_response = on_response + + def __enter__(self): + return self + + def __exit__(self, etype, e, traceback): + if e: + return False + self.conversation_stream.close() + + def is_grpc_error_unavailable(e): + is_grpc_error = isinstance(e, grpc.RpcError) + if is_grpc_error and (e.code() == grpc.StatusCode.UNAVAILABLE): + logging.error('grpc unavailable error: %s', e) + return True + return False + + @retry(reraise=True, stop=stop_after_attempt(3), + retry=retry_if_exception(is_grpc_error_unavailable)) + def assist(self): + """Send a voice request to the Assistant and playback the response. + + Returns: True if conversation should continue. + """ + continue_conversation = False + device_actions_futures = [] + + self.conversation_stream.start_recording() + if self.on_conversation_start: + self.on_conversation_start() + + logging.info('Recording audio request.') + + def iter_log_assist_requests(): + for c in self.gen_assist_requests(): + assistant_helpers.log_assist_request_without_audio(c) + yield c + logging.debug('Reached end of AssistRequest iteration.') + + # This generator yields AssistResponse proto messages + # received from the gRPC Google Assistant API. + for resp in self.assistant.Assist(iter_log_assist_requests(), + self.deadline): + assistant_helpers.log_assist_response_without_audio(resp) + if resp.event_type == END_OF_UTTERANCE: + logging.info('End of audio request detected.') + logging.info('Stopping recording.') + self.conversation_stream.stop_recording() + + if self.detected_speech and self.on_speech_recognized: + self.on_speech_recognized(self.detected_speech) + + if resp.speech_results: + self.detected_speech = ' '.join( + r.transcript.strip() for r in resp.speech_results + if len(r).strip()).strip() + + logging.info('Transcript of user request: "%s".', self.detected_speech) + if len(resp.audio_out.audio_data) > 0: + if not self.conversation_stream.playing: + self.conversation_stream.stop_recording() + self.conversation_stream.start_playback() + logging.info('Playing assistant response.') + self.conversation_stream.write(resp.audio_out.audio_data) + if resp.dialog_state_out.conversation_state: + conversation_state = resp.dialog_state_out.conversation_state + logging.debug('Updating conversation state.') + self.conversation_state = conversation_state + if resp.dialog_state_out.volume_percentage != 0: + volume_percentage = resp.dialog_state_out.volume_percentage + logging.info('Setting volume to %s%%', volume_percentage) + self.conversation_stream.volume_percentage = volume_percentage + + if self.on_volume_changed: + self.on_volume_changed(volume_percentage) + if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON: + continue_conversation = True + logging.info('Expecting follow-on query from user.') + elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE: + continue_conversation = False + if resp.device_action.device_request_json: + device_request = json.loads( + resp.device_action.device_request_json + ) + fs = self.device_handler(device_request) + if fs: + device_actions_futures.extend(fs) + if self.display and resp.screen_out.data: + system_browser = browser_helpers.system_browser + system_browser.display(resp.screen_out.data) + + if resp.dialog_state_out.supplemental_display_text and self.on_response: + self.on_response(resp.dialog_state_out.supplemental_display_text) + + if len(device_actions_futures): + logging.info('Waiting for device executions to complete.') + concurrent.futures.wait(device_actions_futures) + + logging.info('Finished playing assistant response.') + self.conversation_stream.stop_playback() + + if self.on_conversation_end: + self.on_conversation_end(continue_conversation) + + return continue_conversation + + def gen_assist_requests(self): + """Yields: AssistRequest messages to send to the API.""" + + config = embedded_assistant_pb2.AssistConfig( + audio_in_config=embedded_assistant_pb2.AudioInConfig( + encoding='LINEAR16', + sample_rate_hertz=self.conversation_stream.sample_rate, + ), + audio_out_config=embedded_assistant_pb2.AudioOutConfig( + encoding='LINEAR16', + sample_rate_hertz=self.conversation_stream.sample_rate, + volume_percentage=self.conversation_stream.volume_percentage, + ), + dialog_state_in=embedded_assistant_pb2.DialogStateIn( + language_code=self.language_code, + conversation_state=self.conversation_state, + is_new_conversation=self.is_new_conversation, + ), + device_config=embedded_assistant_pb2.DeviceConfig( + device_id=self.device_id, + device_model_id=self.device_model_id, + ) + ) + if self.display: + config.screen_out_config.screen_mode = PLAYING + # Continue current conversation with later requests. + self.is_new_conversation = False + # The first AssistRequest must contain the AssistConfig + # and no audio data. + yield embedded_assistant_pb2.AssistRequest(config=config) + for data in self.conversation_stream: + # Subsequent requests need audio data, but not config. + yield embedded_assistant_pb2.AssistRequest(audio_in=data) + diff --git a/platypush/plugins/assistant/google/pushtotalk.py b/platypush/plugins/assistant/google/pushtotalk.py index 08fa3dd7a..3f97a998c 100644 --- a/platypush/plugins/assistant/google/pushtotalk.py +++ b/platypush/plugins/assistant/google/pushtotalk.py @@ -2,34 +2,242 @@ .. moduleauthor:: Fabio Manganiello """ -from platypush.context import get_backend -from platypush.plugins import Plugin, action +import json +import logging +import os -class AssistantGooglePushtotalkPlugin(Plugin): +import google.auth.transport.grpc +import google.auth.transport.requests +import google.oauth2.credentials + +import googlesamples.assistant.grpc.audio_helpers as audio_helpers +import googlesamples.assistant.grpc.device_helpers as device_helpers + +from platypush.context import get_bus +from platypush.message.event.assistant import ConversationStartEvent, \ + ConversationEndEvent, SpeechRecognizedEvent, VolumeChangedEvent, \ + ResponseEvent + +from platypush.plugins import action +from platypush.plugins.assistant import AssistantPlugin +from platypush.plugins.assistant.google.lib import SampleAssistant + +class AssistantGooglePushtotalkPlugin(AssistantPlugin): """ - Plugin for the Google assistant pushtotalk API. It acts as a wrapper to - programmatically control a - :mod:`platypush.backend.assistant.google.pushtotalk` backend. + Plugin for the Google Assistant push-to-talk API. + + Triggers: + + * :class:`platypush.message.event.assistant.ConversationStartEvent` \ + when a new conversation starts + * :class:`platypush.message.event.assistant.SpeechRecognizedEvent` \ + when a new voice command is recognized + * :class:`platypush.message.event.assistant.ConversationEndEvent` \ + when a new conversation ends + + Requires: + + * **tenacity** (``pip install tenacity``) + * **google-assistant-sdk** (``pip install google-assistant-sdk[samples]``) """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + api_endpoint = 'embeddedassistant.googleapis.com' + audio_sample_rate = audio_helpers.DEFAULT_AUDIO_SAMPLE_RATE + audio_sample_width = audio_helpers.DEFAULT_AUDIO_SAMPLE_WIDTH + audio_iter_size = audio_helpers.DEFAULT_AUDIO_ITER_SIZE + audio_block_size = audio_helpers.DEFAULT_AUDIO_DEVICE_BLOCK_SIZE + audio_flush_size = audio_helpers.DEFAULT_AUDIO_DEVICE_FLUSH_SIZE + grpc_deadline = 60 * 3 + 5 + + def __init__(self, + credentials_file=os.path.join( + os.path.expanduser('~'), '.config', + 'google-oauthlib-tool', 'credentials.json'), + device_config=os.path.join( + os.path.expanduser('~'), '.config', 'googlesamples-assistant', + 'device_config.json'), + language='en-US', **kwargs): + """ + :param credentials_file: Path to the Google OAuth credentials file \ + (default: ~/.config/google-oauthlib-tool/credentials.json). \ + See https://developers.google.com/assistant/sdk/guides/library/python/embed/install-sample#generate_credentials \ + for instructions to get your own credentials file. + :type credentials_file: str + + :param device_config: Path to device_config.json. Register your device \ + (see https://developers.google.com/assistant/sdk/guides/library/python/embed/register-device) \ + and create a project, then run the pushtotalk.py script from \ + googlesamples to create your device_config.json + :type device_config: str + + :param language: Assistant language (default: en-US) + :type language: str + """ + + super().__init__(**kwargs) + + self.language = language + self.credentials_file = credentials_file + self.device_config = device_config + self.assistant = None + self.interactions = [] + + with open(self.device_config) as f: + device = json.load(f) + self.device_id = device['id'] + self.device_model_id = device['model_id'] + + # Load OAuth 2.0 credentials. + try: + with open(self.credentials_file, 'r') as f: + self.credentials = google.oauth2.credentials.Credentials(token=None, + **json.load(f)) + self.http_request = google.auth.transport.requests.Request() + self.credentials.refresh(self.http_request) + except Exception as ex: + self.logger.error('Error loading credentials: %s', str(ex)) + self.logger.error('Run google-oauthlib-tool to initialize ' + 'new OAuth 2.0 credentials.') + raise + + self.grpc_channel = None + self.conversation_stream = None + self.device_handler = None + + def _init_assistant(self): + self.interactions = [] + + # Create an authorized gRPC channel. + self.grpc_channel = google.auth.transport.grpc.secure_authorized_channel( + self.credentials, self.http_request, self.api_endpoint) + + self.logger.info('Connecting to {}'.format(self.api_endpoint)) + + # Configure audio source and sink. + audio_device = None + audio_source = audio_device = ( + audio_device or audio_helpers.SoundDeviceStream( + sample_rate=self.audio_sample_rate, + sample_width=self.audio_sample_width, + block_size=self.audio_block_size, + flush_size=self.audio_flush_size + ) + ) + + audio_sink = audio_device = ( + audio_device or audio_helpers.SoundDeviceStream( + sample_rate=self.audio_sample_rate, + sample_width=self.audio_sample_width, + block_size=self.audio_block_size, + flush_size=self.audio_flush_size + ) + ) + + # Create conversation stream with the given audio source and sink. + self.conversation_stream = audio_helpers.ConversationStream( + source=audio_source, + sink=audio_sink, + iter_size=self.audio_iter_size, + sample_width=self.audio_sample_width, + ) + + self.device_handler = device_helpers.DeviceRequestHandler(self.device_id) + + def on_conversation_start(self): + """ Conversation start handler """ + def handler(): + get_bus().post(ConversationStartEvent()) + + return handler + + def on_conversation_end(self): + """ Conversation end handler """ + def handler(with_follow_on_turn): + get_bus().post(ConversationEndEvent(with_follow_on_turn=with_follow_on_turn)) + + return handler + + def on_speech_recognized(self): + """ Speech recognized handler """ + def handler(phrase): + get_bus().post(SpeechRecognizedEvent(phrase=phrase)) + self.interactions.append({'request': phrase}) + + return handler + + def on_volume_changed(self): + """ Volume changed event """ + def handler(volume): + get_bus().post(VolumeChangedEvent(volume=volume)) + + return handler + + def on_response(self): + """ Response handler """ + def handler(response): + get_bus().post(ResponseEvent(response_text=response)) + + if not self.interactions: + self.interactions.append({'response': response}) + else: + self.interactions[-1]['response'] = response + + return handler @action - def start_conversation(self): + def start_conversation(self, language=None): """ - Programmatically start a conversation with the assistant + Start a conversation + + :param language: Language code override (default: default configured + language) + :type language: str + + :returns: A list of the interactions that happen within the conversation:: + + [ + { + "request": "request 1", + "response": "response 1" + }, + { + "request": "request 2", + "response": "response 2" + }, + ] """ - assistant = get_backend('assistant.google.pushtotalk') - assistant.start_conversation() + + if not language: + language = self.language + + self._init_assistant() + + with SampleAssistant(language_code=language, + device_model_id=self.device_model_id, + device_id=self.device_id, + conversation_stream=self.conversation_stream, + display=None, + channel=self.grpc_channel, + deadline_sec=self.grpc_deadline, + device_handler=None, + on_conversation_start=self.on_conversation_start(), + on_conversation_end=self.on_conversation_end(), + on_volume_changed=self.on_volume_changed(), + on_response=self.on_response(), + on_speech_recognized=self.on_speech_recognized()) as self.assistant: + continue_conversation = True + + while continue_conversation: + continue_conversation = self.assistant.assist() + + return self.interactions @action def stop_conversation(self): - """ - Programmatically stop a running conversation with the assistant - """ - assistant = get_backend('assistant.google.pushtotalk') - assistant.stop_conversation() + """ Stop a conversation """ + if self.assistant: + self.conversation_stream.stop_playback() + get_bus().post(ConversationEndEvent()) + # vim:sw=4:ts=4:et: - diff --git a/requirements.txt b/requirements.txt index fffd40053..60f220ac7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -54,7 +54,9 @@ sqlalchemy # Google APIs general layer support # google-api-python-client # oauth2client -# google-cloud + +# Google text-to-speech API support +# google-cloud-texttospeech # Last.FM scrobbler plugin support # pylast