diff --git a/platypush/plugins/assistant/__init__.py b/platypush/plugins/assistant/__init__.py index bb5b25e48..97e98f30f 100644 --- a/platypush/plugins/assistant/__init__.py +++ b/platypush/plugins/assistant/__init__.py @@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin from platypush.entities.assistants import Assistant from platypush.entities.managers.assistants import AssistantEntityManager from platypush.message.event.assistant import ( - AssistantEvent, - ConversationStartEvent, - ConversationEndEvent, - ConversationTimeoutEvent, - ResponseEvent, - NoResponseEvent, - SpeechRecognizedEvent, - AlarmStartedEvent, AlarmEndEvent, - TimerStartedEvent, - TimerEndEvent, - AlertStartedEvent, + AlarmStartedEvent, AlertEndEvent, + AlertStartedEvent, + AssistantEvent, + ConversationEndEvent, + ConversationStartEvent, + ConversationTimeoutEvent, + HotwordDetectedEvent, MicMutedEvent, MicUnmutedEvent, + NoResponseEvent, + ResponseEvent, + SpeechRecognizedEvent, + TimerEndEvent, + TimerStartedEvent, ) from platypush.plugins import Plugin, action from platypush.utils import get_plugin_name_by_class @@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC): self.stop_conversation() tts.say(text=text, **self.tts_plugin_args) + def _on_hotword_detected(self, hotword: Optional[str]): + self._send_event(HotwordDetectedEvent, hotword=hotword) + def _on_speech_recognized(self, phrase: Optional[str]): phrase = (phrase or '').lower().strip() self._last_query = phrase diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py index c1e55570f..811fe2deb 100644 --- a/platypush/plugins/picovoice/__init__.py +++ b/platypush/plugins/picovoice/__init__.py @@ -1,10 +1,10 @@ from typing import Optional, Sequence -from platypush.context import get_bus from platypush.plugins import RunnablePlugin, action from platypush.plugins.assistant import AssistantPlugin from ._assistant import Assistant +from ._state import AssistantState # pylint: disable=too-many-ancestors @@ -56,7 +56,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin): enable_automatic_punctuation: bool = False, start_conversation_on_hotword: bool = True, audio_queue_size: int = 100, - conversation_timeout: Optional[float] = 5.0, + conversation_timeout: Optional[float] = 7.5, **kwargs, ): """ @@ -116,9 +116,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin): detected after the hotword is detected. If no speech is detected within this time, the conversation will time out and the plugin will go back into hotword detection mode, if the mode is enabled. Default: - 5 seconds. + 7.5 seconds. """ super().__init__(**kwargs) + self._assistant = None self._assistant_args = { 'stop_event': self._should_stop, 'access_key': access_key, @@ -134,6 +135,11 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin): 'start_conversation_on_hotword': start_conversation_on_hotword, 'audio_queue_size': audio_queue_size, 'conversation_timeout': conversation_timeout, + 'on_conversation_start': self._on_conversation_start, + 'on_conversation_end': self._on_conversation_end, + 'on_conversation_timeout': self._on_conversation_timeout, + 'on_speech_recognized': self._on_speech_recognized, + 'on_hotword_detected': self._on_hotword_detected, } @action @@ -141,12 +147,25 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin): """ Programmatically start a conversation with the assistant """ + if not self._assistant: + self.logger.warning('Assistant not initialized') + return + + self._assistant.state = AssistantState.DETECTING_SPEECH @action def stop_conversation(self, *_, **__): """ Programmatically stop a running conversation with the assistant """ + if not self._assistant: + self.logger.warning('Assistant not initialized') + return + + if self._assistant.hotword_enabled: + self._assistant.state = AssistantState.DETECTING_HOTWORD + else: + self._assistant.state = AssistantState.IDLE @action def mute(self, *_, **__): @@ -189,12 +208,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin): def main(self): while not self.should_stop(): self.logger.info('Starting Picovoice assistant') - with Assistant(**self._assistant_args) as assistant: + with Assistant(**self._assistant_args) as self._assistant: try: - for event in assistant: - if event: - event.args['assistant'] = 'picovoice' - get_bus().post(event) + for event in self._assistant: + self.logger.debug('Picovoice assistant event: %s', event) except KeyboardInterrupt: break except Exception as e: diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py index 27a727129..e511bc01c 100644 --- a/platypush/plugins/picovoice/_assistant.py +++ b/platypush/plugins/picovoice/_assistant.py @@ -9,16 +9,13 @@ import pvleopard import pvporcupine import pvrhino -from platypush.context import get_bus from platypush.message.event.assistant import ( - ConversationStartEvent, - ConversationEndEvent, ConversationTimeoutEvent, HotwordDetectedEvent, SpeechRecognizedEvent, ) -from ._context import SpeechDetectionContext +from ._context import ConversationContext from ._recorder import AudioRecorder from ._state import AssistantState @@ -28,6 +25,9 @@ class Assistant: A facade class that wraps the Picovoice engines under an assistant API. """ + def _default_callback(*_, **__): + pass + def __init__( self, access_key: str, @@ -45,6 +45,11 @@ class Assistant: start_conversation_on_hotword: bool = False, audio_queue_size: int = 100, conversation_timeout: Optional[float] = None, + on_conversation_start=_default_callback, + on_conversation_end=_default_callback, + on_conversation_timeout=_default_callback, + on_speech_recognized=_default_callback, + on_hotword_detected=_default_callback, ): self._access_key = access_key self._stop_event = stop_event @@ -62,10 +67,16 @@ class Assistant: self.start_conversation_on_hotword = start_conversation_on_hotword self.audio_queue_size = audio_queue_size + self._on_conversation_start = on_conversation_start + self._on_conversation_end = on_conversation_end + self._on_conversation_timeout = on_conversation_timeout + self._on_speech_recognized = on_speech_recognized + self._on_hotword_detected = on_hotword_detected + self._recorder = None self._state = AssistantState.IDLE self._state_lock = RLock() - self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout) + self._ctx = ConversationContext(timeout=conversation_timeout) if hotword_enabled: if not keywords: @@ -119,11 +130,11 @@ class Assistant: return if prev_state == AssistantState.DETECTING_SPEECH: - self._speech_ctx.stop() - self._post_event(ConversationEndEvent()) + self._ctx.stop() + self._on_conversation_end() elif new_state == AssistantState.DETECTING_SPEECH: - self._speech_ctx.start() - self._post_event(ConversationStartEvent()) + self._ctx.start() + self._on_conversation_start() @property def porcupine(self) -> Optional[pvporcupine.Porcupine]: @@ -239,11 +250,6 @@ class Assistant: raise StopIteration - def _post_event(self, event): - if event: - event.args['assistant'] = 'picovoice' - get_bus().post(event) - def _process_hotword(self, frame): if not self.porcupine: return None @@ -256,6 +262,7 @@ class Assistant: if self.start_conversation_on_hotword: self.state = AssistantState.DETECTING_SPEECH + self._on_hotword_detected(hotword=self.keywords[keyword_index]) return HotwordDetectedEvent(hotword=self.keywords[keyword_index]) return None @@ -265,26 +272,36 @@ class Assistant: return None event = None - ( - self._speech_ctx.partial_transcript, - self._speech_ctx.is_final, - ) = self.cheetah.process(frame) + partial_transcript, self._ctx.is_final = self.cheetah.process(frame) - if self._speech_ctx.partial_transcript: + if partial_transcript: + self._ctx.partial_transcript += partial_transcript self.logger.info( 'Partial transcript: %s, is_final: %s', - self._speech_ctx.partial_transcript, - self._speech_ctx.is_final, + self._ctx.partial_transcript, + self._ctx.is_final, ) - if self._speech_ctx.is_final or self._speech_ctx.timed_out: - event = ( - ConversationTimeoutEvent() - if self._speech_ctx.timed_out - else SpeechRecognizedEvent(phrase=self.cheetah.flush()) - ) + if self._ctx.is_final or self._ctx.timed_out: + phrase = '' + if self.cheetah: + phrase = self.cheetah.flush() - if self.porcupine: + if not self._ctx.is_final: + self._ctx.partial_transcript += phrase + phrase = self._ctx.partial_transcript + + phrase = phrase[:1].lower() + phrase[1:] + + if self._ctx.is_final or phrase: + event = SpeechRecognizedEvent(phrase=phrase) + self._on_speech_recognized(phrase=phrase) + else: + event = ConversationTimeoutEvent() + self._on_conversation_timeout() + + self._ctx.reset() + if self.hotword_enabled: self.state = AssistantState.DETECTING_HOTWORD return event diff --git a/platypush/plugins/picovoice/_context.py b/platypush/plugins/picovoice/_context.py index cb7546105..1a5340739 100644 --- a/platypush/plugins/picovoice/_context.py +++ b/platypush/plugins/picovoice/_context.py @@ -4,9 +4,9 @@ from typing import Optional @dataclass -class SpeechDetectionContext: +class ConversationContext: """ - Context of the speech detection process. + Context of the conversation process. """ partial_transcript: str = ''