From 0b8e1bb81ba17b1ce82794ca756b27f3f00d6f22 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Mon, 8 Apr 2024 03:02:03 +0200
Subject: [PATCH] Better integration with the native base API of the assistant
 plugin.

---
 platypush/plugins/assistant/__init__.py   | 26 ++++----
 platypush/plugins/picovoice/__init__.py   | 33 +++++++---
 platypush/plugins/picovoice/_assistant.py | 73 ++++++++++++++---------
 platypush/plugins/picovoice/_context.py   |  4 +-
 4 files changed, 87 insertions(+), 49 deletions(-)

diff --git a/platypush/plugins/assistant/__init__.py b/platypush/plugins/assistant/__init__.py
index bb5b25e4..97e98f30 100644
--- a/platypush/plugins/assistant/__init__.py
+++ b/platypush/plugins/assistant/__init__.py
@@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin
 from platypush.entities.assistants import Assistant
 from platypush.entities.managers.assistants import AssistantEntityManager
 from platypush.message.event.assistant import (
-    AssistantEvent,
-    ConversationStartEvent,
-    ConversationEndEvent,
-    ConversationTimeoutEvent,
-    ResponseEvent,
-    NoResponseEvent,
-    SpeechRecognizedEvent,
-    AlarmStartedEvent,
     AlarmEndEvent,
-    TimerStartedEvent,
-    TimerEndEvent,
-    AlertStartedEvent,
+    AlarmStartedEvent,
     AlertEndEvent,
+    AlertStartedEvent,
+    AssistantEvent,
+    ConversationEndEvent,
+    ConversationStartEvent,
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
     MicMutedEvent,
     MicUnmutedEvent,
+    NoResponseEvent,
+    ResponseEvent,
+    SpeechRecognizedEvent,
+    TimerEndEvent,
+    TimerStartedEvent,
 )
 from platypush.plugins import Plugin, action
 from platypush.utils import get_plugin_name_by_class
@@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
             self.stop_conversation()
             tts.say(text=text, **self.tts_plugin_args)
 
+    def _on_hotword_detected(self, hotword: Optional[str]):
+        self._send_event(HotwordDetectedEvent, hotword=hotword)
+
     def _on_speech_recognized(self, phrase: Optional[str]):
         phrase = (phrase or '').lower().strip()
         self._last_query = phrase
diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py
index c1e55570..811fe2de 100644
--- a/platypush/plugins/picovoice/__init__.py
+++ b/platypush/plugins/picovoice/__init__.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
 
-from platypush.context import get_bus
 from platypush.plugins import RunnablePlugin, action
 from platypush.plugins.assistant import AssistantPlugin
 
 from ._assistant import Assistant
+from ._state import AssistantState
 
 
 # pylint: disable=too-many-ancestors
@@ -56,7 +56,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
         enable_automatic_punctuation: bool = False,
         start_conversation_on_hotword: bool = True,
         audio_queue_size: int = 100,
-        conversation_timeout: Optional[float] = 5.0,
+        conversation_timeout: Optional[float] = 7.5,
         **kwargs,
     ):
         """
@@ -116,9 +116,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             detected after the hotword is detected. If no speech is detected
             within this time, the conversation will time out and the plugin will
             go back into hotword detection mode, if the mode is enabled. Default:
-            5 seconds.
+            7.5 seconds.
         """
         super().__init__(**kwargs)
+        self._assistant = None
         self._assistant_args = {
             'stop_event': self._should_stop,
             'access_key': access_key,
@@ -134,6 +135,11 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             'start_conversation_on_hotword': start_conversation_on_hotword,
             'audio_queue_size': audio_queue_size,
             'conversation_timeout': conversation_timeout,
+            'on_conversation_start': self._on_conversation_start,
+            'on_conversation_end': self._on_conversation_end,
+            'on_conversation_timeout': self._on_conversation_timeout,
+            'on_speech_recognized': self._on_speech_recognized,
+            'on_hotword_detected': self._on_hotword_detected,
         }
 
     @action
@@ -141,12 +147,25 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
         """
         Programmatically start a conversation with the assistant
         """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        self._assistant.state = AssistantState.DETECTING_SPEECH
 
     @action
     def stop_conversation(self, *_, **__):
         """
         Programmatically stop a running conversation with the assistant
         """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        if self._assistant.hotword_enabled:
+            self._assistant.state = AssistantState.DETECTING_HOTWORD
+        else:
+            self._assistant.state = AssistantState.IDLE
 
     @action
     def mute(self, *_, **__):
@@ -189,12 +208,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
     def main(self):
         while not self.should_stop():
             self.logger.info('Starting Picovoice assistant')
-            with Assistant(**self._assistant_args) as assistant:
+            with Assistant(**self._assistant_args) as self._assistant:
                 try:
-                    for event in assistant:
-                        if event:
-                            event.args['assistant'] = 'picovoice'
-                            get_bus().post(event)
+                    for event in self._assistant:
+                        self.logger.debug('Picovoice assistant event: %s', event)
                 except KeyboardInterrupt:
                     break
                 except Exception as e:
diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py
index 27a72712..e511bc01 100644
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@@ -9,16 +9,13 @@ import pvleopard
 import pvporcupine
 import pvrhino
 
-from platypush.context import get_bus
 from platypush.message.event.assistant import (
-    ConversationStartEvent,
-    ConversationEndEvent,
     ConversationTimeoutEvent,
     HotwordDetectedEvent,
     SpeechRecognizedEvent,
 )
 
-from ._context import SpeechDetectionContext
+from ._context import ConversationContext
 from ._recorder import AudioRecorder
 from ._state import AssistantState
 
@@ -28,6 +25,9 @@ class Assistant:
     A facade class that wraps the Picovoice engines under an assistant API.
     """
 
+    def _default_callback(*_, **__):
+        pass
+
     def __init__(
         self,
         access_key: str,
@@ -45,6 +45,11 @@ class Assistant:
         start_conversation_on_hotword: bool = False,
         audio_queue_size: int = 100,
         conversation_timeout: Optional[float] = None,
+        on_conversation_start=_default_callback,
+        on_conversation_end=_default_callback,
+        on_conversation_timeout=_default_callback,
+        on_speech_recognized=_default_callback,
+        on_hotword_detected=_default_callback,
     ):
         self._access_key = access_key
         self._stop_event = stop_event
@@ -62,10 +67,16 @@ class Assistant:
         self.start_conversation_on_hotword = start_conversation_on_hotword
         self.audio_queue_size = audio_queue_size
 
+        self._on_conversation_start = on_conversation_start
+        self._on_conversation_end = on_conversation_end
+        self._on_conversation_timeout = on_conversation_timeout
+        self._on_speech_recognized = on_speech_recognized
+        self._on_hotword_detected = on_hotword_detected
+
         self._recorder = None
         self._state = AssistantState.IDLE
         self._state_lock = RLock()
-        self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout)
+        self._ctx = ConversationContext(timeout=conversation_timeout)
 
         if hotword_enabled:
             if not keywords:
@@ -119,11 +130,11 @@ class Assistant:
             return
 
         if prev_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.stop()
-            self._post_event(ConversationEndEvent())
+            self._ctx.stop()
+            self._on_conversation_end()
         elif new_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.start()
-            self._post_event(ConversationStartEvent())
+            self._ctx.start()
+            self._on_conversation_start()
 
     @property
     def porcupine(self) -> Optional[pvporcupine.Porcupine]:
@@ -239,11 +250,6 @@ class Assistant:
 
         raise StopIteration
 
-    def _post_event(self, event):
-        if event:
-            event.args['assistant'] = 'picovoice'
-            get_bus().post(event)
-
     def _process_hotword(self, frame):
         if not self.porcupine:
             return None
@@ -256,6 +262,7 @@ class Assistant:
             if self.start_conversation_on_hotword:
                 self.state = AssistantState.DETECTING_SPEECH
 
+            self._on_hotword_detected(hotword=self.keywords[keyword_index])
             return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
 
         return None
@@ -265,26 +272,36 @@ class Assistant:
             return None
 
         event = None
-        (
-            self._speech_ctx.partial_transcript,
-            self._speech_ctx.is_final,
-        ) = self.cheetah.process(frame)
+        partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
 
-        if self._speech_ctx.partial_transcript:
+        if partial_transcript:
+            self._ctx.partial_transcript += partial_transcript
             self.logger.info(
                 'Partial transcript: %s, is_final: %s',
-                self._speech_ctx.partial_transcript,
-                self._speech_ctx.is_final,
+                self._ctx.partial_transcript,
+                self._ctx.is_final,
             )
 
-        if self._speech_ctx.is_final or self._speech_ctx.timed_out:
-            event = (
-                ConversationTimeoutEvent()
-                if self._speech_ctx.timed_out
-                else SpeechRecognizedEvent(phrase=self.cheetah.flush())
-            )
+        if self._ctx.is_final or self._ctx.timed_out:
+            phrase = ''
+            if self.cheetah:
+                phrase = self.cheetah.flush()
 
-            if self.porcupine:
+            if not self._ctx.is_final:
+                self._ctx.partial_transcript += phrase
+                phrase = self._ctx.partial_transcript
+
+            phrase = phrase[:1].lower() + phrase[1:]
+
+            if self._ctx.is_final or phrase:
+                event = SpeechRecognizedEvent(phrase=phrase)
+                self._on_speech_recognized(phrase=phrase)
+            else:
+                event = ConversationTimeoutEvent()
+                self._on_conversation_timeout()
+
+            self._ctx.reset()
+            if self.hotword_enabled:
                 self.state = AssistantState.DETECTING_HOTWORD
 
         return event
diff --git a/platypush/plugins/picovoice/_context.py b/platypush/plugins/picovoice/_context.py
index cb754610..1a534073 100644
--- a/platypush/plugins/picovoice/_context.py
+++ b/platypush/plugins/picovoice/_context.py
@@ -4,9 +4,9 @@ from typing import Optional
 
 
 @dataclass
-class SpeechDetectionContext:
+class ConversationContext:
     """
-    Context of the speech detection process.
+    Context of the conversation process.
     """
 
     partial_transcript: str = ''