Better integration with the native base API of the assistant plugin.

2024-04-08 03:02:03 +02:00 · 2024-04-08 03:02:03 +02:00 · 0b8e1bb81b
parent f021b471aa
commit 0b8e1bb81b
4 changed files with 87 additions and 49 deletions
--- a/platypush/plugins/assistant/init.py
+++ b/platypush/plugins/assistant/init.py
@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin
 from platypush.entities.assistants import Assistant
 from platypush.entities.managers.assistants import AssistantEntityManager
 from platypush.message.event.assistant import (
-    AssistantEvent,
-    ConversationStartEvent,
-    ConversationEndEvent,
-    ConversationTimeoutEvent,
-    ResponseEvent,
-    NoResponseEvent,
-    SpeechRecognizedEvent,
-    AlarmStartedEvent,
    AlarmEndEvent,
-    TimerStartedEvent,
-    TimerEndEvent,
-    AlertStartedEvent,
+    AlarmStartedEvent,
    AlertEndEvent,
+    AlertStartedEvent,
+    AssistantEvent,
+    ConversationEndEvent,
+    ConversationStartEvent,
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
    MicMutedEvent,
    MicUnmutedEvent,
+    NoResponseEvent,
+    ResponseEvent,
+    SpeechRecognizedEvent,
+    TimerEndEvent,
+    TimerStartedEvent,
 )
 from platypush.plugins import Plugin, action
 from platypush.utils import get_plugin_name_by_class
@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
            self.stop_conversation()
            tts.say(text=text, **self.tts_plugin_args)

+    def _on_hotword_detected(self, hotword: Optional[str]):
+        self._send_event(HotwordDetectedEvent, hotword=hotword)
+
    def _on_speech_recognized(self, phrase: Optional[str]):
        phrase = (phrase or '').lower().strip()
        self._last_query = phrase
--- a/platypush/plugins/picovoice/init.py
+++ b/platypush/plugins/picovoice/init.py
@ -1,10 +1,10 @@
 from typing import Optional, Sequence

-from platypush.context import get_bus
 from platypush.plugins import RunnablePlugin, action
 from platypush.plugins.assistant import AssistantPlugin

 from ._assistant import Assistant
+from ._state import AssistantState


 # pylint: disable=too-many-ancestors
@ -56,7 +56,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
        enable_automatic_punctuation: bool = False,
        start_conversation_on_hotword: bool = True,
        audio_queue_size: int = 100,
-        conversation_timeout: Optional[float] = 5.0,
+        conversation_timeout: Optional[float] = 7.5,
        **kwargs,
    ):
        """
@ -116,9 +116,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
            detected after the hotword is detected. If no speech is detected
            within this time, the conversation will time out and the plugin will
            go back into hotword detection mode, if the mode is enabled. Default:
-            5 seconds.
+            7.5 seconds.
        """
        super().__init__(**kwargs)
+        self._assistant = None
        self._assistant_args = {
            'stop_event': self._should_stop,
            'access_key': access_key,
@ -134,6 +135,11 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
            'start_conversation_on_hotword': start_conversation_on_hotword,
            'audio_queue_size': audio_queue_size,
            'conversation_timeout': conversation_timeout,
+            'on_conversation_start': self._on_conversation_start,
+            'on_conversation_end': self._on_conversation_end,
+            'on_conversation_timeout': self._on_conversation_timeout,
+            'on_speech_recognized': self._on_speech_recognized,
+            'on_hotword_detected': self._on_hotword_detected,
        }

    @action
@ -141,12 +147,25 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
        """
        Programmatically start a conversation with the assistant
        """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        self._assistant.state = AssistantState.DETECTING_SPEECH

    @action
    def stop_conversation(self, *_, **__):
        """
        Programmatically stop a running conversation with the assistant
        """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        if self._assistant.hotword_enabled:
+            self._assistant.state = AssistantState.DETECTING_HOTWORD
+        else:
+            self._assistant.state = AssistantState.IDLE

    @action
    def mute(self, *_, **__):
@ -189,12 +208,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
    def main(self):
        while not self.should_stop():
            self.logger.info('Starting Picovoice assistant')
-            with Assistant(**self._assistant_args) as assistant:
+            with Assistant(**self._assistant_args) as self._assistant:
                try:
-                    for event in assistant:
-                        if event:
-                            event.args['assistant'] = 'picovoice'
-                            get_bus().post(event)
+                    for event in self._assistant:
+                        self.logger.debug('Picovoice assistant event: %s', event)
                except KeyboardInterrupt:
                    break
                except Exception as e:
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@ -9,16 +9,13 @@ import pvleopard
 import pvporcupine
 import pvrhino

-from platypush.context import get_bus
 from platypush.message.event.assistant import (
-    ConversationStartEvent,
-    ConversationEndEvent,
    ConversationTimeoutEvent,
    HotwordDetectedEvent,
    SpeechRecognizedEvent,
 )

-from ._context import SpeechDetectionContext
+from ._context import ConversationContext
 from ._recorder import AudioRecorder
 from ._state import AssistantState

@ -28,6 +25,9 @@ class Assistant:
    A facade class that wraps the Picovoice engines under an assistant API.
    """

+    def _default_callback(*_, **__):
+        pass
+
    def __init__(
        self,
        access_key: str,
@ -45,6 +45,11 @@ class Assistant:
        start_conversation_on_hotword: bool = False,
        audio_queue_size: int = 100,
        conversation_timeout: Optional[float] = None,
+        on_conversation_start=_default_callback,
+        on_conversation_end=_default_callback,
+        on_conversation_timeout=_default_callback,
+        on_speech_recognized=_default_callback,
+        on_hotword_detected=_default_callback,
    ):
        self._access_key = access_key
        self._stop_event = stop_event
@ -62,10 +67,16 @@ class Assistant:
        self.start_conversation_on_hotword = start_conversation_on_hotword
        self.audio_queue_size = audio_queue_size

+        self._on_conversation_start = on_conversation_start
+        self._on_conversation_end = on_conversation_end
+        self._on_conversation_timeout = on_conversation_timeout
+        self._on_speech_recognized = on_speech_recognized
+        self._on_hotword_detected = on_hotword_detected
+
        self._recorder = None
        self._state = AssistantState.IDLE
        self._state_lock = RLock()
-        self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout)
+        self._ctx = ConversationContext(timeout=conversation_timeout)

        if hotword_enabled:
            if not keywords:
@ -119,11 +130,11 @@ class Assistant:
            return

        if prev_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.stop()
-            self._post_event(ConversationEndEvent())
+            self._ctx.stop()
+            self._on_conversation_end()
        elif new_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.start()
-            self._post_event(ConversationStartEvent())
+            self._ctx.start()
+            self._on_conversation_start()

    @property
    def porcupine(self) -> Optional[pvporcupine.Porcupine]:
@ -239,11 +250,6 @@ class Assistant:

        raise StopIteration

-    def _post_event(self, event):
-        if event:
-            event.args['assistant'] = 'picovoice'
-            get_bus().post(event)
-
    def _process_hotword(self, frame):
        if not self.porcupine:
            return None
@ -256,6 +262,7 @@ class Assistant:
            if self.start_conversation_on_hotword:
                self.state = AssistantState.DETECTING_SPEECH

+            self._on_hotword_detected(hotword=self.keywords[keyword_index])
            return HotwordDetectedEvent(hotword=self.keywords[keyword_index])

        return None
@ -265,26 +272,36 @@ class Assistant:
            return None

        event = None
-        (
-            self._speech_ctx.partial_transcript,
-            self._speech_ctx.is_final,
-        ) = self.cheetah.process(frame)
+        partial_transcript, self._ctx.is_final = self.cheetah.process(frame)

-        if self._speech_ctx.partial_transcript:
+        if partial_transcript:
+            self._ctx.partial_transcript += partial_transcript
            self.logger.info(
                'Partial transcript: %s, is_final: %s',
-                self._speech_ctx.partial_transcript,
-                self._speech_ctx.is_final,
+                self._ctx.partial_transcript,
+                self._ctx.is_final,
            )

-        if self._speech_ctx.is_final or self._speech_ctx.timed_out:
-            event = (
-                ConversationTimeoutEvent()
-                if self._speech_ctx.timed_out
-                else SpeechRecognizedEvent(phrase=self.cheetah.flush())
-            )
+        if self._ctx.is_final or self._ctx.timed_out:
+            phrase = ''
+            if self.cheetah:
+                phrase = self.cheetah.flush()

-            if self.porcupine:
+            if not self._ctx.is_final:
+                self._ctx.partial_transcript += phrase
+                phrase = self._ctx.partial_transcript
+
+            phrase = phrase[:1].lower() + phrase[1:]
+
+            if self._ctx.is_final or phrase:
+                event = SpeechRecognizedEvent(phrase=phrase)
+                self._on_speech_recognized(phrase=phrase)
+            else:
+                event = ConversationTimeoutEvent()
+                self._on_conversation_timeout()
+
+            self._ctx.reset()
+            if self.hotword_enabled:
                self.state = AssistantState.DETECTING_HOTWORD

        return event
--- a/platypush/plugins/picovoice/_context.py
+++ b/platypush/plugins/picovoice/_context.py
@ -4,9 +4,9 @@ from typing import Optional


@dataclass
-class SpeechDetectionContext:
+class ConversationContext:
    """
-    Context of the speech detection process.
+    Context of the conversation process.
    """

    partial_transcript: str = ''