From f0382c73ab77c44db243178b39a010be9dcd4e25 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Sat, 6 Apr 2024 00:11:46 +0200
Subject: [PATCH 1/6] [#304] Removed old Picovoice integrations

---
 platypush/backend/stt/__init__.py             |  40 ---
 platypush/backend/stt/picovoice/__init__.py   |   0
 .../backend/stt/picovoice/hotword/__init__.py |  21 --
 .../stt/picovoice/hotword/manifest.yaml       |   6 -
 .../backend/stt/picovoice/speech/__init__.py  |  21 --
 .../stt/picovoice/speech/manifest.yaml        |   6 -
 platypush/plugins/stt/__init__.py             | 336 ------------------
 platypush/plugins/stt/picovoice/__init__.py   |   0
 .../plugins/stt/picovoice/hotword/__init__.py | 120 -------
 .../stt/picovoice/hotword/manifest.yaml       |   7 -
 .../plugins/stt/picovoice/speech/__init__.py  | 154 --------
 .../stt/picovoice/speech/manifest.yaml        |   7 -
 12 files changed, 718 deletions(-)
 delete mode 100644 platypush/backend/stt/__init__.py
 delete mode 100644 platypush/backend/stt/picovoice/__init__.py
 delete mode 100644 platypush/backend/stt/picovoice/hotword/__init__.py
 delete mode 100644 platypush/backend/stt/picovoice/hotword/manifest.yaml
 delete mode 100644 platypush/backend/stt/picovoice/speech/__init__.py
 delete mode 100644 platypush/backend/stt/picovoice/speech/manifest.yaml
 delete mode 100644 platypush/plugins/stt/__init__.py
 delete mode 100644 platypush/plugins/stt/picovoice/__init__.py
 delete mode 100644 platypush/plugins/stt/picovoice/hotword/__init__.py
 delete mode 100644 platypush/plugins/stt/picovoice/hotword/manifest.yaml
 delete mode 100644 platypush/plugins/stt/picovoice/speech/__init__.py
 delete mode 100644 platypush/plugins/stt/picovoice/speech/manifest.yaml

diff --git a/platypush/backend/stt/__init__.py b/platypush/backend/stt/__init__.py
deleted file mode 100644
index 624c2b72..00000000
--- a/platypush/backend/stt/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import time
-
-from platypush.backend import Backend
-from platypush.context import get_plugin
-from platypush.plugins.stt import SttPlugin
-
-
-class SttBackend(Backend):
-    """
-    Base class for speech-to-text backends.
-    """
-
-    def __init__(self, plugin_name: str, retry_sleep: float = 5.0, *args, **kwargs):
-        """
-        :param plugin_name: Plugin name of the class that will be used for speech detection. Must be an instance of
-            :class:`platypush.plugins.stt.SttPlugin`.
-        :param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
-            (default: 5 seconds).
-        """
-        super().__init__(*args, **kwargs)
-        self.plugin_name = plugin_name
-        self.retry_sleep = retry_sleep
-
-    def run(self):
-        super().run()
-        self.logger.info('Starting {} speech-to-text backend'.format(self.__class__.__name__))
-
-        while not self.should_stop():
-            try:
-                plugin: SttPlugin = get_plugin(self.plugin_name)
-                with plugin:
-                    # noinspection PyProtectedMember
-                    plugin._detection_thread.join()
-            except Exception as e:
-                self.logger.exception(e)
-                self.logger.warning('Encountered an unexpected error, retrying in {} seconds'.format(self.retry_sleep))
-                time.sleep(self.retry_sleep)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/backend/stt/picovoice/__init__.py b/platypush/backend/stt/picovoice/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/platypush/backend/stt/picovoice/hotword/__init__.py b/platypush/backend/stt/picovoice/hotword/__init__.py
deleted file mode 100644
index 9dc6ae63..00000000
--- a/platypush/backend/stt/picovoice/hotword/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from platypush.backend.stt import SttBackend
-
-
-class SttPicovoiceHotwordBackend(SttBackend):
-    """
-    Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you
-    want to run the hotword engine continuously instead of programmatically using
-    ``start_detection`` and ``stop_detection``.
-
-    Requires:
-
-        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies
-          installed.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__('stt.picovoice.hotword', *args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/backend/stt/picovoice/hotword/manifest.yaml b/platypush/backend/stt/picovoice/hotword/manifest.yaml
deleted file mode 100644
index 0527afca..00000000
--- a/platypush/backend/stt/picovoice/hotword/manifest.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip: []
-  package: platypush.backend.stt.picovoice.hotword
-  type: backend
diff --git a/platypush/backend/stt/picovoice/speech/__init__.py b/platypush/backend/stt/picovoice/speech/__init__.py
deleted file mode 100644
index 28a4b0b1..00000000
--- a/platypush/backend/stt/picovoice/speech/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from platypush.backend.stt import SttBackend
-
-
-class SttPicovoiceSpeechBackend(SttBackend):
-    """
-    Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you
-    want to run the speech engine continuously instead of programmatically using
-    ``start_detection`` and ``stop_detection``.
-
-    Requires:
-
-        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies
-          installed.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__('stt.picovoice.speech', *args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/backend/stt/picovoice/speech/manifest.yaml b/platypush/backend/stt/picovoice/speech/manifest.yaml
deleted file mode 100644
index fc68a467..00000000
--- a/platypush/backend/stt/picovoice/speech/manifest.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip: []
-  package: platypush.backend.stt.picovoice.speech
-  type: backend
diff --git a/platypush/plugins/stt/__init__.py b/platypush/plugins/stt/__init__.py
deleted file mode 100644
index 1df2ae45..00000000
--- a/platypush/plugins/stt/__init__.py
+++ /dev/null
@@ -1,336 +0,0 @@
-import queue
-import threading
-from abc import ABC, abstractmethod
-from typing import Optional, Union, List
-
-import sounddevice as sd
-
-from platypush.context import get_bus
-from platypush.message.event.stt import (
-    SpeechDetectionStartedEvent,
-    SpeechDetectionStoppedEvent,
-    SpeechStartedEvent,
-    SpeechDetectedEvent,
-    HotwordDetectedEvent,
-    ConversationDetectedEvent,
-)
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import Plugin, action
-
-
-class SttPlugin(ABC, Plugin):
-    """
-    Abstract class for speech-to-text plugins.
-    """
-
-    _thread_stop_timeout = 10.0
-    rate = 16000
-    channels = 1
-
-    def __init__(
-        self,
-        input_device: Optional[Union[int, str]] = None,
-        hotword: Optional[str] = None,
-        hotwords: Optional[List[str]] = None,
-        conversation_timeout: Optional[float] = 10.0,
-        block_duration: float = 1.0,
-    ):
-        """
-        :param input_device: PortAudio device index or name that will be used for recording speech (default: default
-            system audio input device).
-        :param hotword: When this word is detected, the plugin will trigger a
-            :class:`platypush.message.event.stt.HotwordDetectedEvent` instead of a
-            :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can use these events for hooking other
-            assistants.
-        :param hotwords: Use a list of hotwords instead of a single one.
-        :param conversation_timeout: If ``hotword`` or ``hotwords`` are set and ``conversation_timeout`` is set,
-            the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
-            instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
-            here to run any logic depending on the detected speech - it can emulate a kind of
-            "OK, Google. Turn on the lights" interaction without using an external assistant (default: 10 seconds).
-        :param block_duration: Duration of the acquired audio blocks (default: 1 second).
-        """
-
-        super().__init__()
-        self.input_device = input_device
-        self.conversation_timeout = conversation_timeout
-        self.block_duration = block_duration
-
-        self.hotwords = set(hotwords or [])
-        if hotword:
-            self.hotwords = {hotword}
-
-        self._conversation_event = threading.Event()
-        self._input_stream: Optional[sd.InputStream] = None
-        self._recording_thread: Optional[threading.Thread] = None
-        self._detection_thread: Optional[threading.Thread] = None
-        self._audio_queue: Optional[queue.Queue] = None
-        self._current_text = ''
-
-    def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
-        """
-        Get the index of the input device by index or name.
-
-        :param device: Device index or name. If None is set then the function will return the index of the
-            default audio input device.
-        :return: Index of the audio input device.
-        """
-        if not device:
-            device = self.input_device
-        if not device:
-            return sd.query_hostapis()[0].get('default_input_device')
-
-        if isinstance(device, int):
-            assert device <= len(sd.query_devices())
-            return device
-
-        for i, dev in enumerate(sd.query_devices()):
-            if dev['name'] == device:
-                return i
-
-        raise AssertionError('Device {} not found'.format(device))
-
-    def on_speech_detected(self, speech: str) -> None:
-        """
-        Hook called when speech is detected. Triggers the right event depending on the current context.
-
-        :param speech: Detected speech.
-        """
-        speech = speech.strip()
-
-        if speech in self.hotwords:
-            event = HotwordDetectedEvent(hotword=speech)
-            if self.conversation_timeout:
-                self._conversation_event.set()
-                threading.Timer(
-                    self.conversation_timeout, lambda: self._conversation_event.clear()
-                ).start()
-        elif self._conversation_event.is_set():
-            event = ConversationDetectedEvent(speech=speech)
-        else:
-            event = SpeechDetectedEvent(speech=speech)
-
-        get_bus().post(event)
-
-    @staticmethod
-    def convert_frames(frames: bytes) -> bytes:
-        """
-        Conversion method for raw audio frames. It just returns the input frames as bytes. Override it if required
-        by your logic.
-
-        :param frames: Input audio frames, as bytes.
-        :return: The audio frames as passed on the input. Override if required.
-        """
-        return frames
-
-    def on_detection_started(self) -> None:
-        """
-        Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
-        required.
-        """
-        pass
-
-    def on_detection_ended(self) -> None:
-        """
-        Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
-        """
-        pass
-
-    def before_recording(self) -> None:
-        """
-        Method called when the ``recording_thread`` starts. Put here any logic that you may want to run before the
-        recording thread starts.
-        """
-        pass
-
-    def on_recording_started(self) -> None:
-        """
-        Method called after the ``recording_thread`` opens the audio device. Put here any logic that you may want to
-        run after the recording starts.
-        """
-        pass
-
-    def on_recording_ended(self) -> None:
-        """
-        Method called when the ``recording_thread`` stops. Put here any logic that you want to run after the audio
-        device is closed.
-        """
-        pass
-
-    @abstractmethod
-    def detect_speech(self, frames) -> str:
-        """
-        Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
-        by the derived classes.
-
-        :param frames: Audio frames, as returned by ``convert_frames``.
-        :return: Detected text, as a string. Returns an empty string if no text has been detected.
-        """
-        raise NotImplementedError
-
-    def process_text(self, text: str) -> None:
-        if (not text and self._current_text) or (text and text == self._current_text):
-            self.on_speech_detected(self._current_text)
-            self._current_text = ''
-        else:
-            if text:
-                if not self._current_text:
-                    get_bus().post(SpeechStartedEvent())
-                self.logger.info('Intermediate speech results: [{}]'.format(text))
-
-            self._current_text = text
-
-    def detection_thread(self) -> None:
-        """
-        This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
-        """
-        self._current_text = ''
-        self.logger.debug('Detection thread started')
-        self.on_detection_started()
-
-        while self._audio_queue:
-            try:
-                frames = self._audio_queue.get()
-                frames = self.convert_frames(frames)
-            except Exception as e:
-                self.logger.warning(
-                    'Error while feeding audio to the model: {}'.format(str(e))
-                )
-                continue
-
-            text = self.detect_speech(frames).strip()
-            self.process_text(text)
-
-        self.on_detection_ended()
-        self.logger.debug('Detection thread terminated')
-
-    def recording_thread(
-        self,
-        block_duration: Optional[float] = None,
-        block_size: Optional[int] = None,
-        input_device: Optional[str] = None,
-    ) -> None:
-        """
-        Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
-
-        :param block_duration: Audio blocks duration. Specify either ``block_duration`` or ``block_size``.
-        :param block_size: Size of the audio blocks. Specify either ``block_duration`` or ``block_size``.
-        :param input_device: Input device
-        """
-        assert (block_duration or block_size) and not (
-            block_duration and block_size
-        ), 'Please specify either block_duration or block_size'
-
-        if not block_size:
-            block_size = int(self.rate * self.channels * block_duration)
-
-        self.before_recording()
-        self.logger.debug('Recording thread started')
-        device = self._get_input_device(input_device)
-        self._input_stream = sd.InputStream(
-            samplerate=self.rate,
-            device=device,
-            channels=self.channels,
-            dtype='int16',
-            latency=0,
-            blocksize=block_size,
-        )
-        self._input_stream.start()
-        self.on_recording_started()
-        get_bus().post(SpeechDetectionStartedEvent())
-
-        while self._input_stream:
-            try:
-                frames = self._input_stream.read(block_size)[0]
-            except Exception as e:
-                self.logger.warning(
-                    'Error while reading from the audio input: {}'.format(str(e))
-                )
-                continue
-
-            self._audio_queue.put(frames)
-
-        get_bus().post(SpeechDetectionStoppedEvent())
-        self.on_recording_ended()
-        self.logger.debug('Recording thread terminated')
-
-    @abstractmethod
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file. Must be implemented by the derived classes.
-
-        :param audio_file: Path to the audio file.
-        """
-        raise NotImplementedError
-
-    def __enter__(self):
-        """
-        Context manager enter. Starts detection and returns self.
-        """
-        self.start_detection()
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        """
-        Context manager exit. Stops detection.
-        """
-        self.stop_detection()
-
-    @action
-    def start_detection(
-        self,
-        input_device: Optional[str] = None,
-        seconds: Optional[float] = None,
-        block_duration: Optional[float] = None,
-    ) -> None:
-        """
-        Start the speech detection engine.
-
-        :param input_device: Audio input device name/index override
-        :param seconds: If set, then the detection engine will stop after this many seconds, otherwise it'll
-            start running until ``stop_detection`` is called or application stop.
-        :param block_duration: ``block_duration`` override.
-        """
-        assert (
-            not self._input_stream and not self._recording_thread
-        ), 'Speech detection is already running'
-        block_duration = block_duration or self.block_duration
-        input_device = input_device if input_device is not None else self.input_device
-        self._audio_queue = queue.Queue()
-        self._recording_thread = threading.Thread(
-            target=lambda: self.recording_thread(
-                block_duration=block_duration, input_device=input_device
-            )
-        )
-
-        self._recording_thread.start()
-        self._detection_thread = threading.Thread(
-            target=lambda: self.detection_thread()
-        )
-        self._detection_thread.start()
-
-        if seconds:
-            threading.Timer(seconds, lambda: self.stop_detection()).start()
-
-    @action
-    def stop_detection(self) -> None:
-        """
-        Stop the speech detection engine.
-        """
-        assert self._input_stream, 'Speech detection is not running'
-        self._input_stream.stop(ignore_errors=True)
-        self._input_stream.close(ignore_errors=True)
-        self._input_stream = None
-
-        if self._recording_thread:
-            self._recording_thread.join(timeout=self._thread_stop_timeout)
-            self._recording_thread = None
-
-        self._audio_queue = None
-        if self._detection_thread:
-            self._detection_thread.join(timeout=self._thread_stop_timeout)
-            self._detection_thread = None
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/stt/picovoice/__init__.py b/platypush/plugins/stt/picovoice/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/platypush/plugins/stt/picovoice/hotword/__init__.py b/platypush/plugins/stt/picovoice/hotword/__init__.py
deleted file mode 100644
index 5c776783..00000000
--- a/platypush/plugins/stt/picovoice/hotword/__init__.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import struct
-from typing import Optional, List
-
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import action
-from platypush.plugins.stt import SttPlugin
-
-
-class SttPicovoiceHotwordPlugin(SttPlugin):
-    """
-    This plugin performs hotword detection using `PicoVoice <https://github.com/Picovoice>`_.
-    """
-
-    def __init__(
-        self,
-        library_path: Optional[str] = None,
-        model_file_path: Optional[str] = None,
-        keyword_file_paths: Optional[List[str]] = None,
-        sensitivity: float = 0.5,
-        sensitivities: Optional[List[float]] = None,
-        *args,
-        **kwargs
-    ):
-        from pvporcupine import Porcupine
-        from pvporcupine.resources.util.python.util import (
-            LIBRARY_PATH,
-            MODEL_FILE_PATH,
-            KEYWORD_FILE_PATHS,
-        )
-
-        super().__init__(*args, **kwargs)
-
-        self.hotwords = list(self.hotwords)
-        self._hotword_engine: Optional[Porcupine] = None
-        self._library_path = os.path.abspath(
-            os.path.expanduser(library_path or LIBRARY_PATH)
-        )
-        self._model_file_path = os.path.abspath(
-            os.path.expanduser(model_file_path or MODEL_FILE_PATH)
-        )
-
-        if not keyword_file_paths:
-            hotwords = KEYWORD_FILE_PATHS
-            assert all(
-                hotword in hotwords for hotword in self.hotwords
-            ), 'Not all the hotwords could be found. Available hotwords: {}'.format(
-                list(hotwords.keys())
-            )
-
-            self._keyword_file_paths = [
-                os.path.abspath(os.path.expanduser(hotwords[hotword]))
-                for hotword in self.hotwords
-            ]
-        else:
-            self._keyword_file_paths = [
-                os.path.abspath(os.path.expanduser(p)) for p in keyword_file_paths
-            ]
-
-        self._sensitivities = []
-        if sensitivities:
-            assert len(self._keyword_file_paths) == len(
-                sensitivities
-            ), 'Please specify as many sensitivities as the number of configured hotwords'
-
-            self._sensitivities = sensitivities
-        else:
-            self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
-
-    def convert_frames(self, frames: bytes) -> tuple:
-        assert self._hotword_engine, 'The hotword engine is not running'
-        return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
-
-    def on_detection_ended(self) -> None:
-        if self._hotword_engine:
-            self._hotword_engine.delete()
-        self._hotword_engine = None
-
-    def detect_speech(self, frames: tuple) -> str:
-        index = self._hotword_engine.process(frames)
-        if index < 0:
-            return ''
-
-        if index is True:
-            index = 0
-        return self.hotwords[index]
-
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file.
-
-        :param audio_file: Path to the audio file.
-        """
-        pass
-
-    def recording_thread(
-        self, input_device: Optional[str] = None, *args, **kwargs
-    ) -> None:
-        assert self._hotword_engine, 'The hotword engine has not yet been initialized'
-        super().recording_thread(
-            block_size=self._hotword_engine.frame_length, input_device=input_device
-        )
-
-    @action
-    def start_detection(self, *args, **kwargs) -> None:
-        from pvporcupine import Porcupine
-
-        self._hotword_engine = Porcupine(
-            library_path=self._library_path,
-            model_file_path=self._model_file_path,
-            keyword_file_paths=self._keyword_file_paths,
-            sensitivities=self._sensitivities,
-        )
-
-        self.rate = self._hotword_engine.sample_rate
-        super().start_detection(*args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/stt/picovoice/hotword/manifest.yaml b/platypush/plugins/stt/picovoice/hotword/manifest.yaml
deleted file mode 100644
index f8e9d210..00000000
--- a/platypush/plugins/stt/picovoice/hotword/manifest.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip:
-    - pvporcupine
-  package: platypush.plugins.stt.picovoice.hotword
-  type: plugin
diff --git a/platypush/plugins/stt/picovoice/speech/__init__.py b/platypush/plugins/stt/picovoice/speech/__init__.py
deleted file mode 100644
index 4043ec53..00000000
--- a/platypush/plugins/stt/picovoice/speech/__init__.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import inspect
-import os
-import platform
-import struct
-import threading
-from typing import Optional
-
-from platypush.message.event.stt import SpeechStartedEvent
-
-from platypush.context import get_bus
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import action
-from platypush.plugins.stt import SttPlugin
-
-
-class SttPicovoiceSpeechPlugin(SttPlugin):
-    """
-    This plugin performs speech detection using `PicoVoice <https://github.com/Picovoice>`_.
-    NOTE: The PicoVoice product used for real-time speech-to-text (Cheetah) can be used freely for
-    personal applications on x86_64 Linux. Other architectures and operating systems require a commercial license.
-    You can ask for a license `here <https://picovoice.ai/contact.html>`_.
-    """
-
-    def __init__(
-        self,
-        library_path: Optional[str] = None,
-        acoustic_model_path: Optional[str] = None,
-        language_model_path: Optional[str] = None,
-        license_path: Optional[str] = None,
-        end_of_speech_timeout: int = 1,
-        *args,
-        **kwargs
-    ):
-        """
-        :param library_path: Path to the Cheetah binary library for your OS
-            (default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``).
-        :param acoustic_model_path: Path to the acoustic speech model
-            (default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``).
-        :param language_model_path:  Path to the language model
-            (default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``).
-        :param license_path: Path to your PicoVoice license
-            (default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``).
-        :param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering
-            a phrase over (default: 1).
-        """
-        from pvcheetah import Cheetah
-
-        super().__init__(*args, **kwargs)
-
-        self._basedir = os.path.abspath(
-            os.path.join(inspect.getfile(Cheetah), '..', '..', '..')
-        )
-        if not library_path:
-            library_path = self._get_library_path()
-        if not language_model_path:
-            language_model_path = os.path.join(
-                self._basedir, 'lib', 'common', 'language_model.pv'
-            )
-        if not acoustic_model_path:
-            acoustic_model_path = os.path.join(
-                self._basedir, 'lib', 'common', 'acoustic_model.pv'
-            )
-        if not license_path:
-            license_path = os.path.join(
-                self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic'
-            )
-
-        self._library_path = library_path
-        self._language_model_path = language_model_path
-        self._acoustic_model_path = acoustic_model_path
-        self._license_path = license_path
-        self._end_of_speech_timeout = end_of_speech_timeout
-        self._stt_engine: Optional[Cheetah] = None
-        self._speech_in_progress = threading.Event()
-
-    def _get_library_path(self) -> str:
-        path = os.path.join(
-            self._basedir, 'lib', platform.system().lower(), platform.machine()
-        )
-        return os.path.join(
-            path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0]
-        )
-
-    def convert_frames(self, frames: bytes) -> tuple:
-        assert self._stt_engine, 'The speech engine is not running'
-        return struct.unpack_from("h" * self._stt_engine.frame_length, frames)
-
-    def on_detection_ended(self) -> None:
-        if self._stt_engine:
-            self._stt_engine.delete()
-        self._stt_engine = None
-
-    def detect_speech(self, frames: tuple) -> str:
-        text, is_endpoint = self._stt_engine.process(frames)
-        text = text.strip()
-
-        if text:
-            if not self._speech_in_progress.is_set():
-                self._speech_in_progress.set()
-                get_bus().post(SpeechStartedEvent())
-
-            self._current_text += ' ' + text.strip()
-
-        if is_endpoint:
-            text = self._stt_engine.flush().strip().strip()
-            if text:
-                self._current_text += ' ' + text
-
-            self._speech_in_progress.clear()
-            if self._current_text:
-                self.on_speech_detected(self._current_text)
-
-            self._current_text = ''
-
-        return self._current_text
-
-    def process_text(self, text: str) -> None:
-        pass
-
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file.
-
-        :param audio_file: Path to the audio file.
-        """
-        pass
-
-    def recording_thread(
-        self, input_device: Optional[str] = None, *args, **kwargs
-    ) -> None:
-        assert self._stt_engine, 'The hotword engine has not yet been initialized'
-        super().recording_thread(
-            block_size=self._stt_engine.frame_length, input_device=input_device
-        )
-
-    @action
-    def start_detection(self, *args, **kwargs) -> None:
-        from pvcheetah import Cheetah
-
-        self._stt_engine = Cheetah(
-            library_path=self._library_path,
-            acoustic_model_path=self._acoustic_model_path,
-            language_model_path=self._language_model_path,
-            license_path=self._license_path,
-            endpoint_duration_sec=self._end_of_speech_timeout,
-        )
-
-        self.rate = self._stt_engine.sample_rate
-        self._speech_in_progress.clear()
-        super().start_detection(*args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/stt/picovoice/speech/manifest.yaml b/platypush/plugins/stt/picovoice/speech/manifest.yaml
deleted file mode 100644
index 0e7a01a8..00000000
--- a/platypush/plugins/stt/picovoice/speech/manifest.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip:
-    - cheetah
-  package: platypush.plugins.stt.picovoice.speech
-  type: plugin

From 01dec0b7a4a4c3c136c68513991e510f58c5ba2f Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Sun, 7 Apr 2024 22:42:01 +0200
Subject: [PATCH 2/6] [WIP] Added initial hotword integration.

---
 platypush/plugins/picovoice/__init__.py   | 170 +++++++++++++++++++++
 platypush/plugins/picovoice/_assistant.py | 171 ++++++++++++++++++++++
 platypush/plugins/picovoice/_recorder.py  |  77 ++++++++++
 platypush/plugins/picovoice/_state.py     |  14 ++
 platypush/plugins/picovoice/manifest.yaml |  22 +++
 platypush/plugins/sound/_manager/_main.py |   8 +-
 platypush/utils/mock/modules.py           |   4 +-
 7 files changed, 462 insertions(+), 4 deletions(-)
 create mode 100644 platypush/plugins/picovoice/__init__.py
 create mode 100644 platypush/plugins/picovoice/_assistant.py
 create mode 100644 platypush/plugins/picovoice/_recorder.py
 create mode 100644 platypush/plugins/picovoice/_state.py
 create mode 100644 platypush/plugins/picovoice/manifest.yaml

diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py
new file mode 100644
index 00000000..a861f66b
--- /dev/null
+++ b/platypush/plugins/picovoice/__init__.py
@@ -0,0 +1,170 @@
+from typing import Optional, Sequence
+
+from platypush.context import get_bus
+from platypush.plugins import RunnablePlugin, action
+from platypush.plugins.assistant import AssistantPlugin
+
+from ._assistant import Assistant
+
+
+# pylint: disable=too-many-ancestors
+class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
+    """
+    A voice assistant that runs on your device, based on the `Picovoice
+    <https://picovoice.ai/>`_ engine.
+
+    .. note:: You will need a PicoVoice account and a personal access key to
+        use this integration.
+
+    You can get your personal access key by signing up at the `Picovoice
+    console <https://console.picovoice.ai/>`_. You may be asked to submit a
+    reason for using the service (feel free to mention a personal Platypush
+    integration), and you will receive your personal access key.
+
+    You may also be asked to select which products you want to use. The default
+    configuration of this plugin requires the following:
+
+        * **Porcupine**: wake-word engine, if you want the device to listen for
+          a specific wake word in order to start the assistant.
+
+        * **Cheetah**: speech-to-text engine, if you want your voice
+          interactions to be transcribed into free text - either programmatically
+          or when triggered by the wake word. Or:
+
+        * **Rhino**: intent recognition engine, if you want to extract *intents*
+          out of your voice commands - for instance, the phrase "set the living
+          room temperature to 20 degrees" could be mapped to the intent with the
+          following parameters: ``intent``: ``set_temperature``, ``room``:
+          ``living_room``, ``temperature``: ``20``.
+
+        * **Leopard**: speech-to-text engine aimed at offline transcription of
+          audio files rather than real-time transcription.
+
+    """
+
+    def __init__(
+        self,
+        access_key: str,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        :param access_key: Your Picovoice access key. You can get it by signing
+            up at the `Picovoice console <https://console.picovoice.ai/>`.
+        :param hotword_enabled: Enable the wake-word engine (default: True).
+            .. note:: The wake-word engine requires you to add Porcupine to the
+                products available in your Picovoice account.
+        :param stt_enabled: Enable the speech-to-text engine (default: True).
+            .. note:: The speech-to-text engine requires you to add Cheetah to
+                the products available in your Picovoice account.
+        :param intent_enabled: Enable the intent recognition engine (default:
+            False).
+            .. note:: The intent recognition engine requires you to add Rhino
+                to the products available in your Picovoice account.
+        :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
+            google``...). Either ``keywords`` or ``keyword_paths`` must be
+            provided if the wake-word engine is enabled. This list can include
+            any of the default Picovoice keywords (available on the `Picovoice
+            repository
+            <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
+        :param keyword_paths: List of paths to the keyword files to listen for.
+            Custom keyword files can be created using the `Picovoice console
+            <https://console.picovoice.ai/ppn>`_ and downloaded from the
+            console itself.
+        :param keyword_model_path: If you are using a keyword file in a
+            non-English language, you can provide the path to the model file
+            for its language. Model files are available for all the supported
+            languages through the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        """
+        super().__init__(**kwargs)
+        self._assistant_args = {
+            'stop_event': self._should_stop,
+            'access_key': access_key,
+            'hotword_enabled': hotword_enabled,
+            'stt_enabled': stt_enabled,
+            'intent_enabled': intent_enabled,
+            'keywords': keywords,
+            'keyword_paths': keyword_paths,
+            'keyword_model_path': keyword_model_path,
+        }
+
+    @action
+    def start_conversation(self, *_, **__):
+        """
+        Programmatically start a conversation with the assistant
+        """
+
+    @action
+    def stop_conversation(self, *_, **__):
+        """
+        Programmatically stop a running conversation with the assistant
+        """
+
+    @action
+    def mute(self, *_, **__):
+        """
+        Mute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=True``.
+        """
+
+    @action
+    def unmute(self, *_, **__):
+        """
+        Unmute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=False``.
+        """
+
+    @action
+    def set_mic_mute(self, muted: bool):
+        """
+        Programmatically mute/unmute the microphone.
+
+        :param muted: Set to True or False.
+        """
+
+    @action
+    def toggle_mute(self, *_, **__):
+        """
+        Toggle the mic mute state.
+        """
+
+    @action
+    def send_text_query(self, *_, query: str, **__):
+        """
+        Send a text query to the assistant.
+
+        This is equivalent to saying something to the assistant.
+
+        :param query: Query to be sent.
+        """
+
+    def main(self):
+        while not self.should_stop():
+            self.logger.info('Starting Picovoice assistant')
+            with Assistant(**self._assistant_args) as assistant:
+                try:
+                    for event in assistant:
+                        if event:
+                            get_bus().post(event)
+                except KeyboardInterrupt:
+                    break
+                except Exception as e:
+                    self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
+                    self.wait_stop(5)
+
+    def stop(self):
+        try:
+            self.stop_conversation()
+        except RuntimeError:
+            pass
+
+        super().stop()
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py
new file mode 100644
index 00000000..5181aa57
--- /dev/null
+++ b/platypush/plugins/picovoice/_assistant.py
@@ -0,0 +1,171 @@
+import logging
+import os
+from threading import Event
+from time import time
+from typing import Any, Dict, Optional, Sequence
+
+import pvcheetah
+import pvleopard
+import pvporcupine
+import pvrhino
+
+from platypush.message.event.assistant import HotwordDetectedEvent
+
+from ._recorder import AudioRecorder
+
+
+class Assistant:
+    """
+    A facade class that wraps the Picovoice engines under an assistant API.
+    """
+
+    def __init__(
+        self,
+        access_key: str,
+        stop_event: Event,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        frame_expiration: float = 3.0,  # Don't process audio frames older than this
+    ):
+        self.logger = logging.getLogger(__name__)
+        self._access_key = access_key
+        self._stop_event = stop_event
+        self.hotword_enabled = hotword_enabled
+        self.stt_enabled = stt_enabled
+        self.intent_enabled = intent_enabled
+        self.keywords = list(keywords or [])
+        self.keyword_paths = None
+        self.keyword_model_path = None
+        self.frame_expiration = frame_expiration
+        self._recorder = None
+
+        if hotword_enabled:
+            if keyword_paths:
+                keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
+                missing_paths = [
+                    path for path in keyword_paths if not os.path.isfile(path)
+                ]
+                if missing_paths:
+                    raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
+
+                self.keyword_paths = keyword_paths
+
+            if keyword_model_path:
+                keyword_model_path = os.path.expanduser(keyword_model_path)
+                if not os.path.isfile(keyword_model_path):
+                    raise FileNotFoundError(
+                        f'Keyword model file not found: {keyword_model_path}'
+                    )
+
+                self.keyword_model_path = keyword_model_path
+
+        self._cheetah: Optional[pvcheetah.Cheetah] = None
+        self._leopard: Optional[pvleopard.Leopard] = None
+        self._porcupine: Optional[pvporcupine.Porcupine] = None
+        self._rhino: Optional[pvrhino.Rhino] = None
+
+    def should_stop(self):
+        return self._stop_event.is_set()
+
+    def wait_stop(self):
+        self._stop_event.wait()
+
+    def _create_porcupine(self):
+        if not self.hotword_enabled:
+            return None
+
+        args: Dict[str, Any] = {'access_key': self._access_key}
+        if not (self.keywords or self.keyword_paths):
+            raise ValueError(
+                'You need to provide either a list of keywords or a list of '
+                'keyword paths if the wake-word engine is enabled'
+            )
+
+        if self.keywords:
+            args['keywords'] = self.keywords
+        if self.keyword_paths:
+            args['keyword_paths'] = self.keyword_paths
+        if self.keyword_model_path:
+            args['model_path'] = self.keyword_model_path
+
+        return pvporcupine.create(**args)
+
+    @property
+    def porcupine(self) -> Optional[pvporcupine.Porcupine]:
+        if not self._porcupine:
+            self._porcupine = self._create_porcupine()
+
+        return self._porcupine
+
+    def __enter__(self):
+        if self._recorder:
+            self.logger.info('A recording stream already exists')
+        elif self.porcupine:
+            self._recorder = AudioRecorder(
+                stop_event=self._stop_event,
+                sample_rate=self.porcupine.sample_rate,
+                frame_size=self.porcupine.frame_length,
+                channels=1,
+            )
+
+            self._recorder.__enter__()
+
+        return self
+
+    def __exit__(self, *_):
+        if self._recorder:
+            self._recorder.__exit__(*_)
+            self._recorder = None
+
+        if self._cheetah:
+            self._cheetah.delete()
+            self._cheetah = None
+
+        if self._leopard:
+            self._leopard.delete()
+            self._leopard = None
+
+        if self._porcupine:
+            self._porcupine.delete()
+            self._porcupine = None
+
+        if self._rhino:
+            self._rhino.delete()
+            self._rhino = None
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        has_data = False
+        if self.should_stop() or not self._recorder:
+            raise StopIteration
+
+        while not (self.should_stop() or has_data):
+            if self.porcupine:  # TODO also check current state
+                data = self._recorder.read()
+                if data is None:
+                    continue
+
+                frame, t = data
+                if time() - t > self.frame_expiration:
+                    self.logger.info(
+                        'Skipping audio frame older than %ss', self.frame_expiration
+                    )
+                    continue  # The audio frame is too old
+
+                keyword_index = self.porcupine.process(frame)
+                if keyword_index is None:
+                    continue  # No keyword detected
+
+                if keyword_index >= 0 and self.keywords:
+                    return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
+
+        raise StopIteration
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/picovoice/_recorder.py b/platypush/plugins/picovoice/_recorder.py
new file mode 100644
index 00000000..9df81e7c
--- /dev/null
+++ b/platypush/plugins/picovoice/_recorder.py
@@ -0,0 +1,77 @@
+from collections import namedtuple
+from logging import getLogger
+from queue import Full, Queue
+from threading import Event
+from time import time
+from typing import Optional
+
+import sounddevice as sd
+
+from platypush.utils import wait_for_either
+
+
+AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
+
+
+class AudioRecorder:
+    """
+    Audio recorder component that uses the sounddevice library to record audio
+    from the microphone.
+    """
+
+    def __init__(
+        self,
+        stop_event: Event,
+        sample_rate: int,
+        frame_size: int,
+        channels: int,
+        dtype: str = 'int16',
+        queue_size: int = 20,
+    ):
+        self.logger = getLogger(__name__)
+        self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
+        self.frame_size = frame_size
+        self._stop_event = Event()
+        self._upstream_stop_event = stop_event
+        self.stream = sd.InputStream(
+            samplerate=sample_rate,
+            channels=channels,
+            dtype=dtype,
+            blocksize=frame_size,
+            callback=self._audio_callback,
+        )
+
+    def __enter__(self):
+        self._stop_event.clear()
+        self.stream.start()
+        return self
+
+    def __exit__(self, *_):
+        self.stop()
+        # self.stream.close()
+
+    def _audio_callback(self, indata, *_):
+        if self.should_stop():
+            return
+
+        try:
+            self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
+        except Full:
+            self.logger.warning('Audio queue is full, dropping audio frame')
+
+    def read(self, timeout: Optional[float] = None):
+        try:
+            return self._audio_queue.get(timeout=timeout)
+        except TimeoutError:
+            self.logger.debug('Audio queue is empty')
+            return None
+
+    def stop(self):
+        self._stop_event.set()
+        self.stream.stop()
+
+    def should_stop(self):
+        return self._stop_event.is_set() or self._upstream_stop_event.is_set()
+
+    def wait(self, timeout: Optional[float] = None):
+        wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
diff --git a/platypush/plugins/picovoice/_state.py b/platypush/plugins/picovoice/_state.py
new file mode 100644
index 00000000..e0eb7e71
--- /dev/null
+++ b/platypush/plugins/picovoice/_state.py
@@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class AssistantState(Enum):
+    """
+    Possible states of the assistant.
+    """
+
+    IDLE = 'idle'
+    DETECTING_HOTWORD = 'detecting_hotword'
+    DETECTING_SPEECH = 'detecting_speech'
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/picovoice/manifest.yaml b/platypush/plugins/picovoice/manifest.yaml
new file mode 100644
index 00000000..9e9afc06
--- /dev/null
+++ b/platypush/plugins/picovoice/manifest.yaml
@@ -0,0 +1,22 @@
+manifest:
+  package: platypush.plugins.picovoice
+  type: plugin
+  events:
+    - platypush.message.event.assistant.ConversationEndEvent
+    - platypush.message.event.assistant.ConversationStartEvent
+    - platypush.message.event.assistant.ConversationTimeoutEvent
+    - platypush.message.event.assistant.HotwordDetectedEvent
+    - platypush.message.event.assistant.MicMutedEvent
+    - platypush.message.event.assistant.MicUnmutedEvent
+    - platypush.message.event.assistant.NoResponseEvent
+    - platypush.message.event.assistant.ResponseEvent
+    - platypush.message.event.assistant.SpeechRecognizedEvent
+  install:
+    pacman:
+      - python-sounddevice
+    pip:
+      - pvcheetah
+      - pvleopard
+      - pvporcupine
+      - pvrhino
+      - sounddevice
diff --git a/platypush/plugins/sound/_manager/_main.py b/platypush/plugins/sound/_manager/_main.py
index 9a1d96cf..4dea95df 100644
--- a/platypush/plugins/sound/_manager/_main.py
+++ b/platypush/plugins/sound/_manager/_main.py
@@ -247,9 +247,11 @@ class AudioManager:
         wait_start = time()
         for audio_thread in streams_to_stop:
             audio_thread.join(
-                timeout=max(0, timeout - (time() - wait_start))
-                if timeout is not None
-                else None
+                timeout=(
+                    max(0, timeout - (time() - wait_start))
+                    if timeout is not None
+                    else None
+                )
             )
 
         # Remove references
diff --git a/platypush/utils/mock/modules.py b/platypush/utils/mock/modules.py
index 6af74355..eb6149f9 100644
--- a/platypush/utils/mock/modules.py
+++ b/platypush/utils/mock/modules.py
@@ -83,7 +83,9 @@ mock_imports = [
     "pmw3901",
     "psutil",
     "pvcheetah",
-    "pvporcupine ",
+    "pvleopard",
+    "pvporcupine",
+    "pvrhino",
     "pyHS100",
     "pyaudio",
     "pychromecast",

From f021b471aa82fa4f965be0a118695ebdd9d36354 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Mon, 8 Apr 2024 01:54:26 +0200
Subject: [PATCH 3/6] [WIP] Added speech detection logic over Cheetah.

---
 docs/source/backends.rst                      |   2 -
 .../backend/stt.picovoice.hotword.rst         |   5 -
 .../backend/stt.picovoice.speech.rst          |   5 -
 docs/source/platypush/plugins/picovoice.rst   |   5 +
 .../plugins/stt.picovoice.hotword.rst         |   5 -
 .../plugins/stt.picovoice.speech.rst          |   5 -
 docs/source/plugins.rst                       |   3 +-
 platypush/plugins/picovoice/__init__.py       |  63 +++++-
 platypush/plugins/picovoice/_assistant.py     | 198 ++++++++++++++----
 platypush/plugins/picovoice/_context.py       |  43 ++++
 platypush/plugins/picovoice/_recorder.py      |   3 +-
 11 files changed, 263 insertions(+), 74 deletions(-)
 delete mode 100644 docs/source/platypush/backend/stt.picovoice.hotword.rst
 delete mode 100644 docs/source/platypush/backend/stt.picovoice.speech.rst
 create mode 100644 docs/source/platypush/plugins/picovoice.rst
 delete mode 100644 docs/source/platypush/plugins/stt.picovoice.hotword.rst
 delete mode 100644 docs/source/platypush/plugins/stt.picovoice.speech.rst
 create mode 100644 platypush/plugins/picovoice/_context.py

diff --git a/docs/source/backends.rst b/docs/source/backends.rst
index 2a43daee..4171e882 100644
--- a/docs/source/backends.rst
+++ b/docs/source/backends.rst
@@ -10,6 +10,4 @@ Backends
     platypush/backend/midi.rst
     platypush/backend/nodered.rst
     platypush/backend/redis.rst
-    platypush/backend/stt.picovoice.hotword.rst
-    platypush/backend/stt.picovoice.speech.rst
     platypush/backend/tcp.rst
diff --git a/docs/source/platypush/backend/stt.picovoice.hotword.rst b/docs/source/platypush/backend/stt.picovoice.hotword.rst
deleted file mode 100644
index 85838688..00000000
--- a/docs/source/platypush/backend/stt.picovoice.hotword.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-``stt.picovoice.hotword``
-===========================================
-
-.. automodule:: platypush.backend.stt.picovoice.hotword
-    :members:
diff --git a/docs/source/platypush/backend/stt.picovoice.speech.rst b/docs/source/platypush/backend/stt.picovoice.speech.rst
deleted file mode 100644
index 8b580966..00000000
--- a/docs/source/platypush/backend/stt.picovoice.speech.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-``stt.picovoice.speech``
-==========================================
-
-.. automodule:: platypush.backend.stt.picovoice.speech
-    :members:
diff --git a/docs/source/platypush/plugins/picovoice.rst b/docs/source/platypush/plugins/picovoice.rst
new file mode 100644
index 00000000..f1f8acde
--- /dev/null
+++ b/docs/source/platypush/plugins/picovoice.rst
@@ -0,0 +1,5 @@
+``picovoice``
+=============
+
+.. automodule:: platypush.plugins.picovoice
+    :members:
diff --git a/docs/source/platypush/plugins/stt.picovoice.hotword.rst b/docs/source/platypush/plugins/stt.picovoice.hotword.rst
deleted file mode 100644
index 11eb37dd..00000000
--- a/docs/source/platypush/plugins/stt.picovoice.hotword.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-``stt.picovoice.hotword``
-===========================================
-
-.. automodule:: platypush.plugins.stt.picovoice.hotword
-    :members:
diff --git a/docs/source/platypush/plugins/stt.picovoice.speech.rst b/docs/source/platypush/plugins/stt.picovoice.speech.rst
deleted file mode 100644
index 890c904c..00000000
--- a/docs/source/platypush/plugins/stt.picovoice.speech.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-``stt.picovoice.speech``
-==========================================
-
-.. automodule:: platypush.plugins.stt.picovoice.speech
-    :members:
diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst
index 5e583f5e..783cb841 100644
--- a/docs/source/plugins.rst
+++ b/docs/source/plugins.rst
@@ -95,6 +95,7 @@ Plugins
     platypush/plugins/nmap.rst
     platypush/plugins/ntfy.rst
     platypush/plugins/otp.rst
+    platypush/plugins/picovoice.rst
     platypush/plugins/pihole.rst
     platypush/plugins/ping.rst
     platypush/plugins/printer.cups.rst
@@ -119,8 +120,6 @@ Plugins
     platypush/plugins/smartthings.rst
     platypush/plugins/sound.rst
     platypush/plugins/ssh.rst
-    platypush/plugins/stt.picovoice.hotword.rst
-    platypush/plugins/stt.picovoice.speech.rst
     platypush/plugins/sun.rst
     platypush/plugins/switch.tplink.rst
     platypush/plugins/switch.wemo.rst
diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py
index a861f66b..c1e55570 100644
--- a/platypush/plugins/picovoice/__init__.py
+++ b/platypush/plugins/picovoice/__init__.py
@@ -51,27 +51,34 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
         keywords: Optional[Sequence[str]] = None,
         keyword_paths: Optional[Sequence[str]] = None,
         keyword_model_path: Optional[str] = None,
+        speech_model_path: Optional[str] = None,
+        endpoint_duration: Optional[float] = 0.5,
+        enable_automatic_punctuation: bool = False,
+        start_conversation_on_hotword: bool = True,
+        audio_queue_size: int = 100,
+        conversation_timeout: Optional[float] = 5.0,
         **kwargs,
     ):
         """
         :param access_key: Your Picovoice access key. You can get it by signing
             up at the `Picovoice console <https://console.picovoice.ai/>`.
         :param hotword_enabled: Enable the wake-word engine (default: True).
-            .. note:: The wake-word engine requires you to add Porcupine to the
-                products available in your Picovoice account.
+            **Note**: The wake-word engine requires you to add Porcupine to the
+            products available in your Picovoice account.
         :param stt_enabled: Enable the speech-to-text engine (default: True).
-            .. note:: The speech-to-text engine requires you to add Cheetah to
-                the products available in your Picovoice account.
+            **Note**: The speech-to-text engine requires you to add Cheetah to
+            the products available in your Picovoice account.
         :param intent_enabled: Enable the intent recognition engine (default:
             False).
-            .. note:: The intent recognition engine requires you to add Rhino
-                to the products available in your Picovoice account.
+            **Note**: The intent recognition engine requires you to add Rhino
+            to the products available in your Picovoice account.
         :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
-            google``...). Either ``keywords`` or ``keyword_paths`` must be
-            provided if the wake-word engine is enabled. This list can include
-            any of the default Picovoice keywords (available on the `Picovoice
-            repository
+            google``...). This is required if the wake-word engine is enabled.
+            See the `Picovoice repository
             <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
+            for a list of the stock keywords available. If you have a custom
+            model, you can pass its path to the ``keyword_paths`` parameter and
+            its filename (without the path and the platform extension) here.
         :param keyword_paths: List of paths to the keyword files to listen for.
             Custom keyword files can be created using the `Picovoice console
             <https://console.picovoice.ai/ppn>`_ and downloaded from the
@@ -81,6 +88,35 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             for its language. Model files are available for all the supported
             languages through the `Picovoice repository
             <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        :param speech_model_path: Path to the speech model file. If you are
+            using a language other than English, you can provide the path to the
+            model file for that language. Model files are available for all the
+            supported languages through the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        :param endpoint_duration: If set, the assistant will stop listening when
+            no speech is detected for the specified duration (in seconds) after
+            the end of an utterance.
+        :param enable_automatic_punctuation: Enable automatic punctuation
+            insertion.
+        :param start_conversation_on_hotword: If set to True (default), a speech
+            detection session will be started when the hotword is detected. If
+            set to False, you may want to start the conversation programmatically
+            by calling the :meth:`.start_conversation` method instead, or run any
+            custom logic hotword detection logic. This can be particularly useful
+            when you want to run the assistant in a push-to-talk mode, or when you
+            want different hotwords to trigger conversations with different models
+            or languages.
+        :param audio_queue_size: Maximum number of audio frames to hold in the
+            processing queue. You may want to increase this value if you are
+            running this integration on a slow device and/or the logs report
+            audio frame drops too often. Keep in mind that increasing this value
+            will increase the memory usage of the integration. Also, a higher
+            value may result in higher accuracy at the cost of higher latency.
+        :param conversation_timeout: Maximum time to wait for some speech to be
+            detected after the hotword is detected. If no speech is detected
+            within this time, the conversation will time out and the plugin will
+            go back into hotword detection mode, if the mode is enabled. Default:
+            5 seconds.
         """
         super().__init__(**kwargs)
         self._assistant_args = {
@@ -92,6 +128,12 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             'keywords': keywords,
             'keyword_paths': keyword_paths,
             'keyword_model_path': keyword_model_path,
+            'speech_model_path': speech_model_path,
+            'endpoint_duration': endpoint_duration,
+            'enable_automatic_punctuation': enable_automatic_punctuation,
+            'start_conversation_on_hotword': start_conversation_on_hotword,
+            'audio_queue_size': audio_queue_size,
+            'conversation_timeout': conversation_timeout,
         }
 
     @action
@@ -151,6 +193,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
                 try:
                     for event in assistant:
                         if event:
+                            event.args['assistant'] = 'picovoice'
                             get_bus().post(event)
                 except KeyboardInterrupt:
                     break
diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py
index 5181aa57..27a72712 100644
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from threading import Event
+from threading import Event, RLock
 from time import time
 from typing import Any, Dict, Optional, Sequence
 
@@ -9,9 +9,18 @@ import pvleopard
 import pvporcupine
 import pvrhino
 
-from platypush.message.event.assistant import HotwordDetectedEvent
+from platypush.context import get_bus
+from platypush.message.event.assistant import (
+    ConversationStartEvent,
+    ConversationEndEvent,
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
+    SpeechRecognizedEvent,
+)
 
+from ._context import SpeechDetectionContext
 from ._recorder import AudioRecorder
+from ._state import AssistantState
 
 
 class Assistant:
@@ -30,10 +39,16 @@ class Assistant:
         keyword_paths: Optional[Sequence[str]] = None,
         keyword_model_path: Optional[str] = None,
         frame_expiration: float = 3.0,  # Don't process audio frames older than this
+        speech_model_path: Optional[str] = None,
+        endpoint_duration: Optional[float] = None,
+        enable_automatic_punctuation: bool = False,
+        start_conversation_on_hotword: bool = False,
+        audio_queue_size: int = 100,
+        conversation_timeout: Optional[float] = None,
     ):
-        self.logger = logging.getLogger(__name__)
         self._access_key = access_key
         self._stop_event = stop_event
+        self.logger = logging.getLogger(__name__)
         self.hotword_enabled = hotword_enabled
         self.stt_enabled = stt_enabled
         self.intent_enabled = intent_enabled
@@ -41,9 +56,23 @@ class Assistant:
         self.keyword_paths = None
         self.keyword_model_path = None
         self.frame_expiration = frame_expiration
+        self.speech_model_path = speech_model_path
+        self.endpoint_duration = endpoint_duration
+        self.enable_automatic_punctuation = enable_automatic_punctuation
+        self.start_conversation_on_hotword = start_conversation_on_hotword
+        self.audio_queue_size = audio_queue_size
+
         self._recorder = None
+        self._state = AssistantState.IDLE
+        self._state_lock = RLock()
+        self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout)
 
         if hotword_enabled:
+            if not keywords:
+                raise ValueError(
+                    'You need to provide a list of keywords if the wake-word engine is enabled'
+                )
+
             if keyword_paths:
                 keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
                 missing_paths = [
@@ -74,46 +103,89 @@ class Assistant:
     def wait_stop(self):
         self._stop_event.wait()
 
-    def _create_porcupine(self):
-        if not self.hotword_enabled:
-            return None
+    @property
+    def state(self) -> AssistantState:
+        with self._state_lock:
+            return self._state
 
-        args: Dict[str, Any] = {'access_key': self._access_key}
-        if not (self.keywords or self.keyword_paths):
-            raise ValueError(
-                'You need to provide either a list of keywords or a list of '
-                'keyword paths if the wake-word engine is enabled'
-            )
+    @state.setter
+    def state(self, state: AssistantState):
+        with self._state_lock:
+            prev_state = self._state
+            self._state = state
+            new_state = self.state
 
-        if self.keywords:
-            args['keywords'] = self.keywords
-        if self.keyword_paths:
-            args['keyword_paths'] = self.keyword_paths
-        if self.keyword_model_path:
-            args['model_path'] = self.keyword_model_path
+        if prev_state == new_state:
+            return
 
-        return pvporcupine.create(**args)
+        if prev_state == AssistantState.DETECTING_SPEECH:
+            self._speech_ctx.stop()
+            self._post_event(ConversationEndEvent())
+        elif new_state == AssistantState.DETECTING_SPEECH:
+            self._speech_ctx.start()
+            self._post_event(ConversationStartEvent())
 
     @property
     def porcupine(self) -> Optional[pvporcupine.Porcupine]:
+        if not self.hotword_enabled:
+            return None
+
         if not self._porcupine:
-            self._porcupine = self._create_porcupine()
+            args: Dict[str, Any] = {'access_key': self._access_key}
+            if self.keywords:
+                args['keywords'] = self.keywords
+            if self.keyword_paths:
+                args['keyword_paths'] = self.keyword_paths
+            if self.keyword_model_path:
+                args['model_path'] = self.keyword_model_path
+
+            self._porcupine = pvporcupine.create(**args)
 
         return self._porcupine
 
+    @property
+    def cheetah(self) -> Optional[pvcheetah.Cheetah]:
+        if not self.stt_enabled:
+            return None
+
+        if not self._cheetah:
+            args: Dict[str, Any] = {'access_key': self._access_key}
+            if self.speech_model_path:
+                args['model_path'] = self.speech_model_path
+            if self.endpoint_duration:
+                args['endpoint_duration_sec'] = self.endpoint_duration
+            if self.enable_automatic_punctuation:
+                args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
+
+            self._cheetah = pvcheetah.create(**args)
+
+        return self._cheetah
+
     def __enter__(self):
+        if self.should_stop():
+            return self
+
         if self._recorder:
             self.logger.info('A recording stream already exists')
-        elif self.porcupine:
+        elif self.porcupine or self.cheetah:
+            sample_rate = (self.porcupine or self.cheetah).sample_rate  # type: ignore
+            frame_length = (self.porcupine or self.cheetah).frame_length  # type: ignore
+
             self._recorder = AudioRecorder(
                 stop_event=self._stop_event,
-                sample_rate=self.porcupine.sample_rate,
-                frame_size=self.porcupine.frame_length,
+                sample_rate=sample_rate,
+                frame_size=frame_length,
+                queue_size=self.audio_queue_size,
                 channels=1,
             )
 
             self._recorder.__enter__()
 
+            if self.porcupine:
+                self.state = AssistantState.DETECTING_HOTWORD
+            else:
+                self.state = AssistantState.DETECTING_SPEECH
+
         return self
 
     def __exit__(self, *_):
@@ -121,6 +193,8 @@ class Assistant:
             self._recorder.__exit__(*_)
             self._recorder = None
 
+        self.state = AssistantState.IDLE
+
         if self._cheetah:
             self._cheetah.delete()
             self._cheetah = None
@@ -146,26 +220,74 @@ class Assistant:
             raise StopIteration
 
         while not (self.should_stop() or has_data):
-            if self.porcupine:  # TODO also check current state
-                data = self._recorder.read()
-                if data is None:
-                    continue
+            data = self._recorder.read()
+            if data is None:
+                continue
 
-                frame, t = data
-                if time() - t > self.frame_expiration:
-                    self.logger.info(
-                        'Skipping audio frame older than %ss', self.frame_expiration
-                    )
-                    continue  # The audio frame is too old
+            frame, t = data
+            if time() - t > self.frame_expiration:
+                self.logger.info(
+                    'Skipping audio frame older than %ss', self.frame_expiration
+                )
+                continue  # The audio frame is too old
 
-                keyword_index = self.porcupine.process(frame)
-                if keyword_index is None:
-                    continue  # No keyword detected
+            if self.porcupine and self.state == AssistantState.DETECTING_HOTWORD:
+                return self._process_hotword(frame)
 
-                if keyword_index >= 0 and self.keywords:
-                    return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
+            if self.cheetah and self.state == AssistantState.DETECTING_SPEECH:
+                return self._process_speech(frame)
 
         raise StopIteration
 
+    def _post_event(self, event):
+        if event:
+            event.args['assistant'] = 'picovoice'
+            get_bus().post(event)
+
+    def _process_hotword(self, frame):
+        if not self.porcupine:
+            return None
+
+        keyword_index = self.porcupine.process(frame)
+        if keyword_index is None:
+            return None  # No keyword detected
+
+        if keyword_index >= 0 and self.keywords:
+            if self.start_conversation_on_hotword:
+                self.state = AssistantState.DETECTING_SPEECH
+
+            return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
+
+        return None
+
+    def _process_speech(self, frame):
+        if not self.cheetah:
+            return None
+
+        event = None
+        (
+            self._speech_ctx.partial_transcript,
+            self._speech_ctx.is_final,
+        ) = self.cheetah.process(frame)
+
+        if self._speech_ctx.partial_transcript:
+            self.logger.info(
+                'Partial transcript: %s, is_final: %s',
+                self._speech_ctx.partial_transcript,
+                self._speech_ctx.is_final,
+            )
+
+        if self._speech_ctx.is_final or self._speech_ctx.timed_out:
+            event = (
+                ConversationTimeoutEvent()
+                if self._speech_ctx.timed_out
+                else SpeechRecognizedEvent(phrase=self.cheetah.flush())
+            )
+
+            if self.porcupine:
+                self.state = AssistantState.DETECTING_HOTWORD
+
+        return event
+
 
 # vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/picovoice/_context.py b/platypush/plugins/picovoice/_context.py
new file mode 100644
index 00000000..cb754610
--- /dev/null
+++ b/platypush/plugins/picovoice/_context.py
@@ -0,0 +1,43 @@
+from dataclasses import dataclass
+from time import time
+from typing import Optional
+
+
+@dataclass
+class SpeechDetectionContext:
+    """
+    Context of the speech detection process.
+    """
+
+    partial_transcript: str = ''
+    is_final: bool = False
+    timeout: Optional[float] = None
+    t_start: Optional[float] = None
+    t_end: Optional[float] = None
+
+    def start(self):
+        self.reset()
+        self.t_start = time()
+
+    def stop(self):
+        self.reset()
+        self.t_end = time()
+
+    def reset(self):
+        self.partial_transcript = ''
+        self.is_final = False
+        self.t_start = None
+        self.t_end = None
+
+    @property
+    def timed_out(self):
+        return (
+            not self.partial_transcript
+            and not self.is_final
+            and self.timeout
+            and self.t_start
+            and time() - self.t_start > self.timeout
+        )
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/picovoice/_recorder.py b/platypush/plugins/picovoice/_recorder.py
index 9df81e7c..e0c23a8e 100644
--- a/platypush/plugins/picovoice/_recorder.py
+++ b/platypush/plugins/picovoice/_recorder.py
@@ -26,7 +26,7 @@ class AudioRecorder:
         frame_size: int,
         channels: int,
         dtype: str = 'int16',
-        queue_size: int = 20,
+        queue_size: int = 100,
     ):
         self.logger = getLogger(__name__)
         self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
@@ -48,7 +48,6 @@ class AudioRecorder:
 
     def __exit__(self, *_):
         self.stop()
-        # self.stream.close()
 
     def _audio_callback(self, indata, *_):
         if self.should_stop():

From 0b8e1bb81ba17b1ce82794ca756b27f3f00d6f22 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Mon, 8 Apr 2024 03:02:03 +0200
Subject: [PATCH 4/6] Better integration with the native base API of the
 assistant plugin.

---
 platypush/plugins/assistant/__init__.py   | 26 ++++----
 platypush/plugins/picovoice/__init__.py   | 33 +++++++---
 platypush/plugins/picovoice/_assistant.py | 73 ++++++++++++++---------
 platypush/plugins/picovoice/_context.py   |  4 +-
 4 files changed, 87 insertions(+), 49 deletions(-)

diff --git a/platypush/plugins/assistant/__init__.py b/platypush/plugins/assistant/__init__.py
index bb5b25e4..97e98f30 100644
--- a/platypush/plugins/assistant/__init__.py
+++ b/platypush/plugins/assistant/__init__.py
@@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin
 from platypush.entities.assistants import Assistant
 from platypush.entities.managers.assistants import AssistantEntityManager
 from platypush.message.event.assistant import (
-    AssistantEvent,
-    ConversationStartEvent,
-    ConversationEndEvent,
-    ConversationTimeoutEvent,
-    ResponseEvent,
-    NoResponseEvent,
-    SpeechRecognizedEvent,
-    AlarmStartedEvent,
     AlarmEndEvent,
-    TimerStartedEvent,
-    TimerEndEvent,
-    AlertStartedEvent,
+    AlarmStartedEvent,
     AlertEndEvent,
+    AlertStartedEvent,
+    AssistantEvent,
+    ConversationEndEvent,
+    ConversationStartEvent,
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
     MicMutedEvent,
     MicUnmutedEvent,
+    NoResponseEvent,
+    ResponseEvent,
+    SpeechRecognizedEvent,
+    TimerEndEvent,
+    TimerStartedEvent,
 )
 from platypush.plugins import Plugin, action
 from platypush.utils import get_plugin_name_by_class
@@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
             self.stop_conversation()
             tts.say(text=text, **self.tts_plugin_args)
 
+    def _on_hotword_detected(self, hotword: Optional[str]):
+        self._send_event(HotwordDetectedEvent, hotword=hotword)
+
     def _on_speech_recognized(self, phrase: Optional[str]):
         phrase = (phrase or '').lower().strip()
         self._last_query = phrase
diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py
index c1e55570..811fe2de 100644
--- a/platypush/plugins/picovoice/__init__.py
+++ b/platypush/plugins/picovoice/__init__.py
@@ -1,10 +1,10 @@
 from typing import Optional, Sequence
 
-from platypush.context import get_bus
 from platypush.plugins import RunnablePlugin, action
 from platypush.plugins.assistant import AssistantPlugin
 
 from ._assistant import Assistant
+from ._state import AssistantState
 
 
 # pylint: disable=too-many-ancestors
@@ -56,7 +56,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
         enable_automatic_punctuation: bool = False,
         start_conversation_on_hotword: bool = True,
         audio_queue_size: int = 100,
-        conversation_timeout: Optional[float] = 5.0,
+        conversation_timeout: Optional[float] = 7.5,
         **kwargs,
     ):
         """
@@ -116,9 +116,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             detected after the hotword is detected. If no speech is detected
             within this time, the conversation will time out and the plugin will
             go back into hotword detection mode, if the mode is enabled. Default:
-            5 seconds.
+            7.5 seconds.
         """
         super().__init__(**kwargs)
+        self._assistant = None
         self._assistant_args = {
             'stop_event': self._should_stop,
             'access_key': access_key,
@@ -134,6 +135,11 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
             'start_conversation_on_hotword': start_conversation_on_hotword,
             'audio_queue_size': audio_queue_size,
             'conversation_timeout': conversation_timeout,
+            'on_conversation_start': self._on_conversation_start,
+            'on_conversation_end': self._on_conversation_end,
+            'on_conversation_timeout': self._on_conversation_timeout,
+            'on_speech_recognized': self._on_speech_recognized,
+            'on_hotword_detected': self._on_hotword_detected,
         }
 
     @action
@@ -141,12 +147,25 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
         """
         Programmatically start a conversation with the assistant
         """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        self._assistant.state = AssistantState.DETECTING_SPEECH
 
     @action
     def stop_conversation(self, *_, **__):
         """
         Programmatically stop a running conversation with the assistant
         """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        if self._assistant.hotword_enabled:
+            self._assistant.state = AssistantState.DETECTING_HOTWORD
+        else:
+            self._assistant.state = AssistantState.IDLE
 
     @action
     def mute(self, *_, **__):
@@ -189,12 +208,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
     def main(self):
         while not self.should_stop():
             self.logger.info('Starting Picovoice assistant')
-            with Assistant(**self._assistant_args) as assistant:
+            with Assistant(**self._assistant_args) as self._assistant:
                 try:
-                    for event in assistant:
-                        if event:
-                            event.args['assistant'] = 'picovoice'
-                            get_bus().post(event)
+                    for event in self._assistant:
+                        self.logger.debug('Picovoice assistant event: %s', event)
                 except KeyboardInterrupt:
                     break
                 except Exception as e:
diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py
index 27a72712..e511bc01 100644
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@@ -9,16 +9,13 @@ import pvleopard
 import pvporcupine
 import pvrhino
 
-from platypush.context import get_bus
 from platypush.message.event.assistant import (
-    ConversationStartEvent,
-    ConversationEndEvent,
     ConversationTimeoutEvent,
     HotwordDetectedEvent,
     SpeechRecognizedEvent,
 )
 
-from ._context import SpeechDetectionContext
+from ._context import ConversationContext
 from ._recorder import AudioRecorder
 from ._state import AssistantState
 
@@ -28,6 +25,9 @@ class Assistant:
     A facade class that wraps the Picovoice engines under an assistant API.
     """
 
+    def _default_callback(*_, **__):
+        pass
+
     def __init__(
         self,
         access_key: str,
@@ -45,6 +45,11 @@ class Assistant:
         start_conversation_on_hotword: bool = False,
         audio_queue_size: int = 100,
         conversation_timeout: Optional[float] = None,
+        on_conversation_start=_default_callback,
+        on_conversation_end=_default_callback,
+        on_conversation_timeout=_default_callback,
+        on_speech_recognized=_default_callback,
+        on_hotword_detected=_default_callback,
     ):
         self._access_key = access_key
         self._stop_event = stop_event
@@ -62,10 +67,16 @@ class Assistant:
         self.start_conversation_on_hotword = start_conversation_on_hotword
         self.audio_queue_size = audio_queue_size
 
+        self._on_conversation_start = on_conversation_start
+        self._on_conversation_end = on_conversation_end
+        self._on_conversation_timeout = on_conversation_timeout
+        self._on_speech_recognized = on_speech_recognized
+        self._on_hotword_detected = on_hotword_detected
+
         self._recorder = None
         self._state = AssistantState.IDLE
         self._state_lock = RLock()
-        self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout)
+        self._ctx = ConversationContext(timeout=conversation_timeout)
 
         if hotword_enabled:
             if not keywords:
@@ -119,11 +130,11 @@ class Assistant:
             return
 
         if prev_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.stop()
-            self._post_event(ConversationEndEvent())
+            self._ctx.stop()
+            self._on_conversation_end()
         elif new_state == AssistantState.DETECTING_SPEECH:
-            self._speech_ctx.start()
-            self._post_event(ConversationStartEvent())
+            self._ctx.start()
+            self._on_conversation_start()
 
     @property
     def porcupine(self) -> Optional[pvporcupine.Porcupine]:
@@ -239,11 +250,6 @@ class Assistant:
 
         raise StopIteration
 
-    def _post_event(self, event):
-        if event:
-            event.args['assistant'] = 'picovoice'
-            get_bus().post(event)
-
     def _process_hotword(self, frame):
         if not self.porcupine:
             return None
@@ -256,6 +262,7 @@ class Assistant:
             if self.start_conversation_on_hotword:
                 self.state = AssistantState.DETECTING_SPEECH
 
+            self._on_hotword_detected(hotword=self.keywords[keyword_index])
             return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
 
         return None
@@ -265,26 +272,36 @@ class Assistant:
             return None
 
         event = None
-        (
-            self._speech_ctx.partial_transcript,
-            self._speech_ctx.is_final,
-        ) = self.cheetah.process(frame)
+        partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
 
-        if self._speech_ctx.partial_transcript:
+        if partial_transcript:
+            self._ctx.partial_transcript += partial_transcript
             self.logger.info(
                 'Partial transcript: %s, is_final: %s',
-                self._speech_ctx.partial_transcript,
-                self._speech_ctx.is_final,
+                self._ctx.partial_transcript,
+                self._ctx.is_final,
             )
 
-        if self._speech_ctx.is_final or self._speech_ctx.timed_out:
-            event = (
-                ConversationTimeoutEvent()
-                if self._speech_ctx.timed_out
-                else SpeechRecognizedEvent(phrase=self.cheetah.flush())
-            )
+        if self._ctx.is_final or self._ctx.timed_out:
+            phrase = ''
+            if self.cheetah:
+                phrase = self.cheetah.flush()
 
-            if self.porcupine:
+            if not self._ctx.is_final:
+                self._ctx.partial_transcript += phrase
+                phrase = self._ctx.partial_transcript
+
+            phrase = phrase[:1].lower() + phrase[1:]
+
+            if self._ctx.is_final or phrase:
+                event = SpeechRecognizedEvent(phrase=phrase)
+                self._on_speech_recognized(phrase=phrase)
+            else:
+                event = ConversationTimeoutEvent()
+                self._on_conversation_timeout()
+
+            self._ctx.reset()
+            if self.hotword_enabled:
                 self.state = AssistantState.DETECTING_HOTWORD
 
         return event
diff --git a/platypush/plugins/picovoice/_context.py b/platypush/plugins/picovoice/_context.py
index cb754610..1a534073 100644
--- a/platypush/plugins/picovoice/_context.py
+++ b/platypush/plugins/picovoice/_context.py
@@ -4,9 +4,9 @@ from typing import Optional
 
 
 @dataclass
-class SpeechDetectionContext:
+class ConversationContext:
     """
-    Context of the speech detection process.
+    Context of the conversation process.
     """
 
     partial_transcript: str = ''

From c7094c7886b3c16739dad78073a1044cbf9d7ad3 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Mon, 8 Apr 2024 13:26:44 +0200
Subject: [PATCH 5/6] `picovoice` -> `assistant.picovoice`

---
 platypush/plugins/{ => assistant}/picovoice/__init__.py   | 2 +-
 platypush/plugins/{ => assistant}/picovoice/_assistant.py | 0
 platypush/plugins/{ => assistant}/picovoice/_context.py   | 0
 platypush/plugins/{ => assistant}/picovoice/_recorder.py  | 0
 platypush/plugins/{ => assistant}/picovoice/_state.py     | 0
 platypush/plugins/{ => assistant}/picovoice/manifest.yaml | 3 ++-
 platypush/utils/mock/modules.py                           | 1 +
 7 files changed, 4 insertions(+), 2 deletions(-)
 rename platypush/plugins/{ => assistant}/picovoice/__init__.py (99%)
 rename platypush/plugins/{ => assistant}/picovoice/_assistant.py (100%)
 rename platypush/plugins/{ => assistant}/picovoice/_context.py (100%)
 rename platypush/plugins/{ => assistant}/picovoice/_recorder.py (100%)
 rename platypush/plugins/{ => assistant}/picovoice/_state.py (100%)
 rename platypush/plugins/{ => assistant}/picovoice/manifest.yaml (91%)

diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/assistant/picovoice/__init__.py
similarity index 99%
rename from platypush/plugins/picovoice/__init__.py
rename to platypush/plugins/assistant/picovoice/__init__.py
index 811fe2de..c669f19f 100644
--- a/platypush/plugins/picovoice/__init__.py
+++ b/platypush/plugins/assistant/picovoice/__init__.py
@@ -8,7 +8,7 @@ from ._state import AssistantState
 
 
 # pylint: disable=too-many-ancestors
-class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
+class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
     """
     A voice assistant that runs on your device, based on the `Picovoice
     <https://picovoice.ai/>`_ engine.
diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/assistant/picovoice/_assistant.py
similarity index 100%
rename from platypush/plugins/picovoice/_assistant.py
rename to platypush/plugins/assistant/picovoice/_assistant.py
diff --git a/platypush/plugins/picovoice/_context.py b/platypush/plugins/assistant/picovoice/_context.py
similarity index 100%
rename from platypush/plugins/picovoice/_context.py
rename to platypush/plugins/assistant/picovoice/_context.py
diff --git a/platypush/plugins/picovoice/_recorder.py b/platypush/plugins/assistant/picovoice/_recorder.py
similarity index 100%
rename from platypush/plugins/picovoice/_recorder.py
rename to platypush/plugins/assistant/picovoice/_recorder.py
diff --git a/platypush/plugins/picovoice/_state.py b/platypush/plugins/assistant/picovoice/_state.py
similarity index 100%
rename from platypush/plugins/picovoice/_state.py
rename to platypush/plugins/assistant/picovoice/_state.py
diff --git a/platypush/plugins/picovoice/manifest.yaml b/platypush/plugins/assistant/picovoice/manifest.yaml
similarity index 91%
rename from platypush/plugins/picovoice/manifest.yaml
rename to platypush/plugins/assistant/picovoice/manifest.yaml
index 9e9afc06..a44bbccd 100644
--- a/platypush/plugins/picovoice/manifest.yaml
+++ b/platypush/plugins/assistant/picovoice/manifest.yaml
@@ -1,5 +1,5 @@
 manifest:
-  package: platypush.plugins.picovoice
+  package: platypush.plugins.assistant.picovoice
   type: plugin
   events:
     - platypush.message.event.assistant.ConversationEndEvent
@@ -17,6 +17,7 @@ manifest:
     pip:
       - pvcheetah
       - pvleopard
+      - pvorca
       - pvporcupine
       - pvrhino
       - sounddevice
diff --git a/platypush/utils/mock/modules.py b/platypush/utils/mock/modules.py
index eb6149f9..e83fd377 100644
--- a/platypush/utils/mock/modules.py
+++ b/platypush/utils/mock/modules.py
@@ -84,6 +84,7 @@ mock_imports = [
     "psutil",
     "pvcheetah",
     "pvleopard",
+    "pvorca",
     "pvporcupine",
     "pvrhino",
     "pyHS100",

From fa70c91a67193feb00b95aaa78434365586d57ae Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <fabio@manganiello.tech>
Date: Mon, 8 Apr 2024 13:32:36 +0200
Subject: [PATCH 6/6] [assistant.picovoice] Better partial transcript + flush
 handling logic.

---
 platypush/plugins/assistant/picovoice/__init__.py   | 4 ++++
 platypush/plugins/assistant/picovoice/_assistant.py | 6 ++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/platypush/plugins/assistant/picovoice/__init__.py b/platypush/plugins/assistant/picovoice/__init__.py
index c669f19f..9426ba60 100644
--- a/platypush/plugins/assistant/picovoice/__init__.py
+++ b/platypush/plugins/assistant/picovoice/__init__.py
@@ -40,6 +40,10 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
         * **Leopard**: speech-to-text engine aimed at offline transcription of
           audio files rather than real-time transcription.
 
+        * **Orca**: text-to-speech engine, if you want to create your custom
+          logic to respond to user's voice commands and render the responses as
+          audio.
+
     """
 
     def __init__(
diff --git a/platypush/plugins/assistant/picovoice/_assistant.py b/platypush/plugins/assistant/picovoice/_assistant.py
index e511bc01..25142f33 100644
--- a/platypush/plugins/assistant/picovoice/_assistant.py
+++ b/platypush/plugins/assistant/picovoice/_assistant.py
@@ -287,10 +287,8 @@ class Assistant:
             if self.cheetah:
                 phrase = self.cheetah.flush()
 
-            if not self._ctx.is_final:
-                self._ctx.partial_transcript += phrase
-                phrase = self._ctx.partial_transcript
-
+            self._ctx.partial_transcript += phrase
+            phrase = self._ctx.partial_transcript
             phrase = phrase[:1].lower() + phrase[1:]
 
             if self._ctx.is_final or phrase: