[assistant.picovoice] Better partial transcript + flush handling logic.

picovoice -> assistant.picovoice
Better integration with the native base API of the assistant plugin.
2024-04-08 13:33:07 +02:00 · 2024-04-08 13:26:44 +02:00 · 2024-04-08 03:02:03 +02:00 · 2024-04-08 01:54:26 +02:00 · 2024-04-07 22:42:01 +02:00 · 2024-04-06 00:11:46 +02:00
28 changed files with 728 additions and 757 deletions
--- a/docs/source/backends.rst
+++ b/docs/source/backends.rst
@ -10,6 +10,4 @@ Backends
    platypush/backend/midi.rst
    platypush/backend/nodered.rst
    platypush/backend/redis.rst
-    platypush/backend/stt.picovoice.hotword.rst
-    platypush/backend/stt.picovoice.speech.rst
    platypush/backend/tcp.rst
--- a/docs/source/platypush/backend/stt.picovoice.hotword.rst
+++ b/docs/source/platypush/backend/stt.picovoice.hotword.rst
@ -1,5 +0,0 @@
-``stt.picovoice.hotword``
-===========================================
-
-.. automodule:: platypush.backend.stt.picovoice.hotword
-    :members:
--- a/docs/source/platypush/backend/stt.picovoice.speech.rst
+++ b/docs/source/platypush/backend/stt.picovoice.speech.rst
@ -1,5 +0,0 @@
-``stt.picovoice.speech``
-==========================================
-
-.. automodule:: platypush.backend.stt.picovoice.speech
-    :members:
--- a/docs/source/platypush/plugins/picovoice.rst
+++ b/docs/source/platypush/plugins/picovoice.rst
@ -0,0 +1,5 @@
+``picovoice``
+=============
+
+.. automodule:: platypush.plugins.picovoice
+    :members:
--- a/docs/source/platypush/plugins/stt.picovoice.hotword.rst
+++ b/docs/source/platypush/plugins/stt.picovoice.hotword.rst
@ -1,5 +0,0 @@
-``stt.picovoice.hotword``
-===========================================
-
-.. automodule:: platypush.plugins.stt.picovoice.hotword
-    :members:
--- a/docs/source/platypush/plugins/stt.picovoice.speech.rst
+++ b/docs/source/platypush/plugins/stt.picovoice.speech.rst
@ -1,5 +0,0 @@
-``stt.picovoice.speech``
-==========================================
-
-.. automodule:: platypush.plugins.stt.picovoice.speech
-    :members:
--- a/docs/source/plugins.rst
+++ b/docs/source/plugins.rst
@ -95,6 +95,7 @@ Plugins
    platypush/plugins/nmap.rst
    platypush/plugins/ntfy.rst
    platypush/plugins/otp.rst
+    platypush/plugins/picovoice.rst
    platypush/plugins/pihole.rst
    platypush/plugins/ping.rst
    platypush/plugins/printer.cups.rst
@ -119,8 +120,6 @@ Plugins
    platypush/plugins/smartthings.rst
    platypush/plugins/sound.rst
    platypush/plugins/ssh.rst
-    platypush/plugins/stt.picovoice.hotword.rst
-    platypush/plugins/stt.picovoice.speech.rst
    platypush/plugins/sun.rst
    platypush/plugins/switch.tplink.rst
    platypush/plugins/switch.wemo.rst
--- a/platypush/backend/stt/init.py
+++ b/platypush/backend/stt/init.py
@ -1,40 +0,0 @@
-import time
-
-from platypush.backend import Backend
-from platypush.context import get_plugin
-from platypush.plugins.stt import SttPlugin
-
-
-class SttBackend(Backend):
-    """
-    Base class for speech-to-text backends.
-    """
-
-    def __init__(self, plugin_name: str, retry_sleep: float = 5.0, *args, **kwargs):
-        """
-        :param plugin_name: Plugin name of the class that will be used for speech detection. Must be an instance of
-            :class:`platypush.plugins.stt.SttPlugin`.
-        :param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
-            (default: 5 seconds).
-        """
-        super().__init__(*args, **kwargs)
-        self.plugin_name = plugin_name
-        self.retry_sleep = retry_sleep
-
-    def run(self):
-        super().run()
-        self.logger.info('Starting {} speech-to-text backend'.format(self.__class__.__name__))
-
-        while not self.should_stop():
-            try:
-                plugin: SttPlugin = get_plugin(self.plugin_name)
-                with plugin:
-                    # noinspection PyProtectedMember
-                    plugin._detection_thread.join()
-            except Exception as e:
-                self.logger.exception(e)
-                self.logger.warning('Encountered an unexpected error, retrying in {} seconds'.format(self.retry_sleep))
-                time.sleep(self.retry_sleep)
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/backend/stt/picovoice/init.py
+++ b/platypush/backend/stt/picovoice/init.py
--- a/platypush/backend/stt/picovoice/hotword/init.py
+++ b/platypush/backend/stt/picovoice/hotword/init.py
@ -1,21 +0,0 @@
-from platypush.backend.stt import SttBackend
-
-
-class SttPicovoiceHotwordBackend(SttBackend):
-    """
-    Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you
-    want to run the hotword engine continuously instead of programmatically using
-    ``start_detection`` and ``stop_detection``.
-
-    Requires:
-
-        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies
-          installed.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__('stt.picovoice.hotword', *args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/backend/stt/picovoice/hotword/manifest.yaml
+++ b/platypush/backend/stt/picovoice/hotword/manifest.yaml
@ -1,6 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip: []
-  package: platypush.backend.stt.picovoice.hotword
-  type: backend
--- a/platypush/backend/stt/picovoice/speech/init.py
+++ b/platypush/backend/stt/picovoice/speech/init.py
@ -1,21 +0,0 @@
-from platypush.backend.stt import SttBackend
-
-
-class SttPicovoiceSpeechBackend(SttBackend):
-    """
-    Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you
-    want to run the speech engine continuously instead of programmatically using
-    ``start_detection`` and ``stop_detection``.
-
-    Requires:
-
-        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies
-          installed.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__('stt.picovoice.speech', *args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/backend/stt/picovoice/speech/manifest.yaml
+++ b/platypush/backend/stt/picovoice/speech/manifest.yaml
@ -1,6 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip: []
-  package: platypush.backend.stt.picovoice.speech
-  type: backend
--- a/platypush/plugins/assistant/init.py
+++ b/platypush/plugins/assistant/init.py
@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin
 from platypush.entities.assistants import Assistant
 from platypush.entities.managers.assistants import AssistantEntityManager
 from platypush.message.event.assistant import (
-    AssistantEvent,
-    ConversationStartEvent,
-    ConversationEndEvent,
-    ConversationTimeoutEvent,
-    ResponseEvent,
-    NoResponseEvent,
-    SpeechRecognizedEvent,
-    AlarmStartedEvent,
    AlarmEndEvent,
-    TimerStartedEvent,
-    TimerEndEvent,
-    AlertStartedEvent,
+    AlarmStartedEvent,
    AlertEndEvent,
+    AlertStartedEvent,
+    AssistantEvent,
+    ConversationEndEvent,
+    ConversationStartEvent,
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
    MicMutedEvent,
    MicUnmutedEvent,
+    NoResponseEvent,
+    ResponseEvent,
+    SpeechRecognizedEvent,
+    TimerEndEvent,
+    TimerStartedEvent,
 )
 from platypush.plugins import Plugin, action
 from platypush.utils import get_plugin_name_by_class
@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
            self.stop_conversation()
            tts.say(text=text, **self.tts_plugin_args)

+    def _on_hotword_detected(self, hotword: Optional[str]):
+        self._send_event(HotwordDetectedEvent, hotword=hotword)
+
    def _on_speech_recognized(self, phrase: Optional[str]):
        phrase = (phrase or '').lower().strip()
        self._last_query = phrase
--- a/platypush/plugins/assistant/picovoice/init.py
+++ b/platypush/plugins/assistant/picovoice/init.py
@ -0,0 +1,234 @@
+from typing import Optional, Sequence
+
+from platypush.plugins import RunnablePlugin, action
+from platypush.plugins.assistant import AssistantPlugin
+
+from ._assistant import Assistant
+from ._state import AssistantState
+
+
+# pylint: disable=too-many-ancestors
+class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
+    """
+    A voice assistant that runs on your device, based on the `Picovoice
+    <https://picovoice.ai/>`_ engine.
+
+    .. note:: You will need a PicoVoice account and a personal access key to
+        use this integration.
+
+    You can get your personal access key by signing up at the `Picovoice
+    console <https://console.picovoice.ai/>`_. You may be asked to submit a
+    reason for using the service (feel free to mention a personal Platypush
+    integration), and you will receive your personal access key.
+
+    You may also be asked to select which products you want to use. The default
+    configuration of this plugin requires the following:
+
+        * **Porcupine**: wake-word engine, if you want the device to listen for
+          a specific wake word in order to start the assistant.
+
+        * **Cheetah**: speech-to-text engine, if you want your voice
+          interactions to be transcribed into free text - either programmatically
+          or when triggered by the wake word. Or:
+
+        * **Rhino**: intent recognition engine, if you want to extract *intents*
+          out of your voice commands - for instance, the phrase "set the living
+          room temperature to 20 degrees" could be mapped to the intent with the
+          following parameters: ``intent``: ``set_temperature``, ``room``:
+          ``living_room``, ``temperature``: ``20``.
+
+        * **Leopard**: speech-to-text engine aimed at offline transcription of
+          audio files rather than real-time transcription.
+
+        * **Orca**: text-to-speech engine, if you want to create your custom
+          logic to respond to user's voice commands and render the responses as
+          audio.
+
+    """
+
+    def __init__(
+        self,
+        access_key: str,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        speech_model_path: Optional[str] = None,
+        endpoint_duration: Optional[float] = 0.5,
+        enable_automatic_punctuation: bool = False,
+        start_conversation_on_hotword: bool = True,
+        audio_queue_size: int = 100,
+        conversation_timeout: Optional[float] = 7.5,
+        **kwargs,
+    ):
+        """
+        :param access_key: Your Picovoice access key. You can get it by signing
+            up at the `Picovoice console <https://console.picovoice.ai/>`.
+        :param hotword_enabled: Enable the wake-word engine (default: True).
+            **Note**: The wake-word engine requires you to add Porcupine to the
+            products available in your Picovoice account.
+        :param stt_enabled: Enable the speech-to-text engine (default: True).
+            **Note**: The speech-to-text engine requires you to add Cheetah to
+            the products available in your Picovoice account.
+        :param intent_enabled: Enable the intent recognition engine (default:
+            False).
+            **Note**: The intent recognition engine requires you to add Rhino
+            to the products available in your Picovoice account.
+        :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
+            google``...). This is required if the wake-word engine is enabled.
+            See the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
+            for a list of the stock keywords available. If you have a custom
+            model, you can pass its path to the ``keyword_paths`` parameter and
+            its filename (without the path and the platform extension) here.
+        :param keyword_paths: List of paths to the keyword files to listen for.
+            Custom keyword files can be created using the `Picovoice console
+            <https://console.picovoice.ai/ppn>`_ and downloaded from the
+            console itself.
+        :param keyword_model_path: If you are using a keyword file in a
+            non-English language, you can provide the path to the model file
+            for its language. Model files are available for all the supported
+            languages through the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        :param speech_model_path: Path to the speech model file. If you are
+            using a language other than English, you can provide the path to the
+            model file for that language. Model files are available for all the
+            supported languages through the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        :param endpoint_duration: If set, the assistant will stop listening when
+            no speech is detected for the specified duration (in seconds) after
+            the end of an utterance.
+        :param enable_automatic_punctuation: Enable automatic punctuation
+            insertion.
+        :param start_conversation_on_hotword: If set to True (default), a speech
+            detection session will be started when the hotword is detected. If
+            set to False, you may want to start the conversation programmatically
+            by calling the :meth:`.start_conversation` method instead, or run any
+            custom logic hotword detection logic. This can be particularly useful
+            when you want to run the assistant in a push-to-talk mode, or when you
+            want different hotwords to trigger conversations with different models
+            or languages.
+        :param audio_queue_size: Maximum number of audio frames to hold in the
+            processing queue. You may want to increase this value if you are
+            running this integration on a slow device and/or the logs report
+            audio frame drops too often. Keep in mind that increasing this value
+            will increase the memory usage of the integration. Also, a higher
+            value may result in higher accuracy at the cost of higher latency.
+        :param conversation_timeout: Maximum time to wait for some speech to be
+            detected after the hotword is detected. If no speech is detected
+            within this time, the conversation will time out and the plugin will
+            go back into hotword detection mode, if the mode is enabled. Default:
+            7.5 seconds.
+        """
+        super().__init__(**kwargs)
+        self._assistant = None
+        self._assistant_args = {
+            'stop_event': self._should_stop,
+            'access_key': access_key,
+            'hotword_enabled': hotword_enabled,
+            'stt_enabled': stt_enabled,
+            'intent_enabled': intent_enabled,
+            'keywords': keywords,
+            'keyword_paths': keyword_paths,
+            'keyword_model_path': keyword_model_path,
+            'speech_model_path': speech_model_path,
+            'endpoint_duration': endpoint_duration,
+            'enable_automatic_punctuation': enable_automatic_punctuation,
+            'start_conversation_on_hotword': start_conversation_on_hotword,
+            'audio_queue_size': audio_queue_size,
+            'conversation_timeout': conversation_timeout,
+            'on_conversation_start': self._on_conversation_start,
+            'on_conversation_end': self._on_conversation_end,
+            'on_conversation_timeout': self._on_conversation_timeout,
+            'on_speech_recognized': self._on_speech_recognized,
+            'on_hotword_detected': self._on_hotword_detected,
+        }
+
+    @action
+    def start_conversation(self, *_, **__):
+        """
+        Programmatically start a conversation with the assistant
+        """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        self._assistant.state = AssistantState.DETECTING_SPEECH
+
+    @action
+    def stop_conversation(self, *_, **__):
+        """
+        Programmatically stop a running conversation with the assistant
+        """
+        if not self._assistant:
+            self.logger.warning('Assistant not initialized')
+            return
+
+        if self._assistant.hotword_enabled:
+            self._assistant.state = AssistantState.DETECTING_HOTWORD
+        else:
+            self._assistant.state = AssistantState.IDLE
+
+    @action
+    def mute(self, *_, **__):
+        """
+        Mute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=True``.
+        """
+
+    @action
+    def unmute(self, *_, **__):
+        """
+        Unmute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=False``.
+        """
+
+    @action
+    def set_mic_mute(self, muted: bool):
+        """
+        Programmatically mute/unmute the microphone.
+
+        :param muted: Set to True or False.
+        """
+
+    @action
+    def toggle_mute(self, *_, **__):
+        """
+        Toggle the mic mute state.
+        """
+
+    @action
+    def send_text_query(self, *_, query: str, **__):
+        """
+        Send a text query to the assistant.
+
+        This is equivalent to saying something to the assistant.
+
+        :param query: Query to be sent.
+        """
+
+    def main(self):
+        while not self.should_stop():
+            self.logger.info('Starting Picovoice assistant')
+            with Assistant(**self._assistant_args) as self._assistant:
+                try:
+                    for event in self._assistant:
+                        self.logger.debug('Picovoice assistant event: %s', event)
+                except KeyboardInterrupt:
+                    break
+                except Exception as e:
+                    self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
+                    self.wait_stop(5)
+
+    def stop(self):
+        try:
+            self.stop_conversation()
+        except RuntimeError:
+            pass
+
+        super().stop()
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/assistant/picovoice/_assistant.py
+++ b/platypush/plugins/assistant/picovoice/_assistant.py
@ -0,0 +1,308 @@
+import logging
+import os
+from threading import Event, RLock
+from time import time
+from typing import Any, Dict, Optional, Sequence
+
+import pvcheetah
+import pvleopard
+import pvporcupine
+import pvrhino
+
+from platypush.message.event.assistant import (
+    ConversationTimeoutEvent,
+    HotwordDetectedEvent,
+    SpeechRecognizedEvent,
+)
+
+from ._context import ConversationContext
+from ._recorder import AudioRecorder
+from ._state import AssistantState
+
+
+class Assistant:
+    """
+    A facade class that wraps the Picovoice engines under an assistant API.
+    """
+
+    def _default_callback(*_, **__):
+        pass
+
+    def __init__(
+        self,
+        access_key: str,
+        stop_event: Event,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        frame_expiration: float = 3.0,  # Don't process audio frames older than this
+        speech_model_path: Optional[str] = None,
+        endpoint_duration: Optional[float] = None,
+        enable_automatic_punctuation: bool = False,
+        start_conversation_on_hotword: bool = False,
+        audio_queue_size: int = 100,
+        conversation_timeout: Optional[float] = None,
+        on_conversation_start=_default_callback,
+        on_conversation_end=_default_callback,
+        on_conversation_timeout=_default_callback,
+        on_speech_recognized=_default_callback,
+        on_hotword_detected=_default_callback,
+    ):
+        self._access_key = access_key
+        self._stop_event = stop_event
+        self.logger = logging.getLogger(__name__)
+        self.hotword_enabled = hotword_enabled
+        self.stt_enabled = stt_enabled
+        self.intent_enabled = intent_enabled
+        self.keywords = list(keywords or [])
+        self.keyword_paths = None
+        self.keyword_model_path = None
+        self.frame_expiration = frame_expiration
+        self.speech_model_path = speech_model_path
+        self.endpoint_duration = endpoint_duration
+        self.enable_automatic_punctuation = enable_automatic_punctuation
+        self.start_conversation_on_hotword = start_conversation_on_hotword
+        self.audio_queue_size = audio_queue_size
+
+        self._on_conversation_start = on_conversation_start
+        self._on_conversation_end = on_conversation_end
+        self._on_conversation_timeout = on_conversation_timeout
+        self._on_speech_recognized = on_speech_recognized
+        self._on_hotword_detected = on_hotword_detected
+
+        self._recorder = None
+        self._state = AssistantState.IDLE
+        self._state_lock = RLock()
+        self._ctx = ConversationContext(timeout=conversation_timeout)
+
+        if hotword_enabled:
+            if not keywords:
+                raise ValueError(
+                    'You need to provide a list of keywords if the wake-word engine is enabled'
+                )
+
+            if keyword_paths:
+                keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
+                missing_paths = [
+                    path for path in keyword_paths if not os.path.isfile(path)
+                ]
+                if missing_paths:
+                    raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
+
+                self.keyword_paths = keyword_paths
+
+            if keyword_model_path:
+                keyword_model_path = os.path.expanduser(keyword_model_path)
+                if not os.path.isfile(keyword_model_path):
+                    raise FileNotFoundError(
+                        f'Keyword model file not found: {keyword_model_path}'
+                    )
+
+                self.keyword_model_path = keyword_model_path
+
+        self._cheetah: Optional[pvcheetah.Cheetah] = None
+        self._leopard: Optional[pvleopard.Leopard] = None
+        self._porcupine: Optional[pvporcupine.Porcupine] = None
+        self._rhino: Optional[pvrhino.Rhino] = None
+
+    def should_stop(self):
+        return self._stop_event.is_set()
+
+    def wait_stop(self):
+        self._stop_event.wait()
+
+    @property
+    def state(self) -> AssistantState:
+        with self._state_lock:
+            return self._state
+
+    @state.setter
+    def state(self, state: AssistantState):
+        with self._state_lock:
+            prev_state = self._state
+            self._state = state
+            new_state = self.state
+
+        if prev_state == new_state:
+            return
+
+        if prev_state == AssistantState.DETECTING_SPEECH:
+            self._ctx.stop()
+            self._on_conversation_end()
+        elif new_state == AssistantState.DETECTING_SPEECH:
+            self._ctx.start()
+            self._on_conversation_start()
+
+    @property
+    def porcupine(self) -> Optional[pvporcupine.Porcupine]:
+        if not self.hotword_enabled:
+            return None
+
+        if not self._porcupine:
+            args: Dict[str, Any] = {'access_key': self._access_key}
+            if self.keywords:
+                args['keywords'] = self.keywords
+            if self.keyword_paths:
+                args['keyword_paths'] = self.keyword_paths
+            if self.keyword_model_path:
+                args['model_path'] = self.keyword_model_path
+
+            self._porcupine = pvporcupine.create(**args)
+
+        return self._porcupine
+
+    @property
+    def cheetah(self) -> Optional[pvcheetah.Cheetah]:
+        if not self.stt_enabled:
+            return None
+
+        if not self._cheetah:
+            args: Dict[str, Any] = {'access_key': self._access_key}
+            if self.speech_model_path:
+                args['model_path'] = self.speech_model_path
+            if self.endpoint_duration:
+                args['endpoint_duration_sec'] = self.endpoint_duration
+            if self.enable_automatic_punctuation:
+                args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
+
+            self._cheetah = pvcheetah.create(**args)
+
+        return self._cheetah
+
+    def __enter__(self):
+        if self.should_stop():
+            return self
+
+        if self._recorder:
+            self.logger.info('A recording stream already exists')
+        elif self.porcupine or self.cheetah:
+            sample_rate = (self.porcupine or self.cheetah).sample_rate  # type: ignore
+            frame_length = (self.porcupine or self.cheetah).frame_length  # type: ignore
+
+            self._recorder = AudioRecorder(
+                stop_event=self._stop_event,
+                sample_rate=sample_rate,
+                frame_size=frame_length,
+                queue_size=self.audio_queue_size,
+                channels=1,
+            )
+
+            self._recorder.__enter__()
+
+            if self.porcupine:
+                self.state = AssistantState.DETECTING_HOTWORD
+            else:
+                self.state = AssistantState.DETECTING_SPEECH
+
+        return self
+
+    def __exit__(self, *_):
+        if self._recorder:
+            self._recorder.__exit__(*_)
+            self._recorder = None
+
+        self.state = AssistantState.IDLE
+
+        if self._cheetah:
+            self._cheetah.delete()
+            self._cheetah = None
+
+        if self._leopard:
+            self._leopard.delete()
+            self._leopard = None
+
+        if self._porcupine:
+            self._porcupine.delete()
+            self._porcupine = None
+
+        if self._rhino:
+            self._rhino.delete()
+            self._rhino = None
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        has_data = False
+        if self.should_stop() or not self._recorder:
+            raise StopIteration
+
+        while not (self.should_stop() or has_data):
+            data = self._recorder.read()
+            if data is None:
+                continue
+
+            frame, t = data
+            if time() - t > self.frame_expiration:
+                self.logger.info(
+                    'Skipping audio frame older than %ss', self.frame_expiration
+                )
+                continue  # The audio frame is too old
+
+            if self.porcupine and self.state == AssistantState.DETECTING_HOTWORD:
+                return self._process_hotword(frame)
+
+            if self.cheetah and self.state == AssistantState.DETECTING_SPEECH:
+                return self._process_speech(frame)
+
+        raise StopIteration
+
+    def _process_hotword(self, frame):
+        if not self.porcupine:
+            return None
+
+        keyword_index = self.porcupine.process(frame)
+        if keyword_index is None:
+            return None  # No keyword detected
+
+        if keyword_index >= 0 and self.keywords:
+            if self.start_conversation_on_hotword:
+                self.state = AssistantState.DETECTING_SPEECH
+
+            self._on_hotword_detected(hotword=self.keywords[keyword_index])
+            return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
+
+        return None
+
+    def _process_speech(self, frame):
+        if not self.cheetah:
+            return None
+
+        event = None
+        partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
+
+        if partial_transcript:
+            self._ctx.partial_transcript += partial_transcript
+            self.logger.info(
+                'Partial transcript: %s, is_final: %s',
+                self._ctx.partial_transcript,
+                self._ctx.is_final,
+            )
+
+        if self._ctx.is_final or self._ctx.timed_out:
+            phrase = ''
+            if self.cheetah:
+                phrase = self.cheetah.flush()
+
+            self._ctx.partial_transcript += phrase
+            phrase = self._ctx.partial_transcript
+            phrase = phrase[:1].lower() + phrase[1:]
+
+            if self._ctx.is_final or phrase:
+                event = SpeechRecognizedEvent(phrase=phrase)
+                self._on_speech_recognized(phrase=phrase)
+            else:
+                event = ConversationTimeoutEvent()
+                self._on_conversation_timeout()
+
+            self._ctx.reset()
+            if self.hotword_enabled:
+                self.state = AssistantState.DETECTING_HOTWORD
+
+        return event
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/assistant/picovoice/_context.py
+++ b/platypush/plugins/assistant/picovoice/_context.py
@ -0,0 +1,43 @@
+from dataclasses import dataclass
+from time import time
+from typing import Optional
+
+
+@dataclass
+class ConversationContext:
+    """
+    Context of the conversation process.
+    """
+
+    partial_transcript: str = ''
+    is_final: bool = False
+    timeout: Optional[float] = None
+    t_start: Optional[float] = None
+    t_end: Optional[float] = None
+
+    def start(self):
+        self.reset()
+        self.t_start = time()
+
+    def stop(self):
+        self.reset()
+        self.t_end = time()
+
+    def reset(self):
+        self.partial_transcript = ''
+        self.is_final = False
+        self.t_start = None
+        self.t_end = None
+
+    @property
+    def timed_out(self):
+        return (
+            not self.partial_transcript
+            and not self.is_final
+            and self.timeout
+            and self.t_start
+            and time() - self.t_start > self.timeout
+        )
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/assistant/picovoice/_recorder.py
+++ b/platypush/plugins/assistant/picovoice/_recorder.py
@ -0,0 +1,76 @@
+from collections import namedtuple
+from logging import getLogger
+from queue import Full, Queue
+from threading import Event
+from time import time
+from typing import Optional
+
+import sounddevice as sd
+
+from platypush.utils import wait_for_either
+
+
+AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
+
+
+class AudioRecorder:
+    """
+    Audio recorder component that uses the sounddevice library to record audio
+    from the microphone.
+    """
+
+    def __init__(
+        self,
+        stop_event: Event,
+        sample_rate: int,
+        frame_size: int,
+        channels: int,
+        dtype: str = 'int16',
+        queue_size: int = 100,
+    ):
+        self.logger = getLogger(__name__)
+        self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
+        self.frame_size = frame_size
+        self._stop_event = Event()
+        self._upstream_stop_event = stop_event
+        self.stream = sd.InputStream(
+            samplerate=sample_rate,
+            channels=channels,
+            dtype=dtype,
+            blocksize=frame_size,
+            callback=self._audio_callback,
+        )
+
+    def __enter__(self):
+        self._stop_event.clear()
+        self.stream.start()
+        return self
+
+    def __exit__(self, *_):
+        self.stop()
+
+    def _audio_callback(self, indata, *_):
+        if self.should_stop():
+            return
+
+        try:
+            self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
+        except Full:
+            self.logger.warning('Audio queue is full, dropping audio frame')
+
+    def read(self, timeout: Optional[float] = None):
+        try:
+            return self._audio_queue.get(timeout=timeout)
+        except TimeoutError:
+            self.logger.debug('Audio queue is empty')
+            return None
+
+    def stop(self):
+        self._stop_event.set()
+        self.stream.stop()
+
+    def should_stop(self):
+        return self._stop_event.is_set() or self._upstream_stop_event.is_set()
+
+    def wait(self, timeout: Optional[float] = None):
+        wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
--- a/platypush/plugins/assistant/picovoice/_state.py
+++ b/platypush/plugins/assistant/picovoice/_state.py
@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class AssistantState(Enum):
+    """
+    Possible states of the assistant.
+    """
+
+    IDLE = 'idle'
+    DETECTING_HOTWORD = 'detecting_hotword'
+    DETECTING_SPEECH = 'detecting_speech'
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/assistant/picovoice/manifest.yaml
+++ b/platypush/plugins/assistant/picovoice/manifest.yaml
@ -0,0 +1,23 @@
+manifest:
+  package: platypush.plugins.assistant.picovoice
+  type: plugin
+  events:
+    - platypush.message.event.assistant.ConversationEndEvent
+    - platypush.message.event.assistant.ConversationStartEvent
+    - platypush.message.event.assistant.ConversationTimeoutEvent
+    - platypush.message.event.assistant.HotwordDetectedEvent
+    - platypush.message.event.assistant.MicMutedEvent
+    - platypush.message.event.assistant.MicUnmutedEvent
+    - platypush.message.event.assistant.NoResponseEvent
+    - platypush.message.event.assistant.ResponseEvent
+    - platypush.message.event.assistant.SpeechRecognizedEvent
+  install:
+    pacman:
+      - python-sounddevice
+    pip:
+      - pvcheetah
+      - pvleopard
+      - pvorca
+      - pvporcupine
+      - pvrhino
+      - sounddevice
--- a/platypush/plugins/sound/_manager/_main.py
+++ b/platypush/plugins/sound/_manager/_main.py
@ -247,10 +247,12 @@ class AudioManager:
        wait_start = time()
        for audio_thread in streams_to_stop:
            audio_thread.join(
-                timeout=max(0, timeout - (time() - wait_start))
+                timeout=(
+                    max(0, timeout - (time() - wait_start))
                    if timeout is not None
                    else None
                )
+            )

        # Remove references
        for audio_thread in streams_to_stop:
--- a/platypush/plugins/stt/init.py
+++ b/platypush/plugins/stt/init.py
@ -1,336 +0,0 @@
-import queue
-import threading
-from abc import ABC, abstractmethod
-from typing import Optional, Union, List
-
-import sounddevice as sd
-
-from platypush.context import get_bus
-from platypush.message.event.stt import (
-    SpeechDetectionStartedEvent,
-    SpeechDetectionStoppedEvent,
-    SpeechStartedEvent,
-    SpeechDetectedEvent,
-    HotwordDetectedEvent,
-    ConversationDetectedEvent,
-)
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import Plugin, action
-
-
-class SttPlugin(ABC, Plugin):
-    """
-    Abstract class for speech-to-text plugins.
-    """
-
-    _thread_stop_timeout = 10.0
-    rate = 16000
-    channels = 1
-
-    def __init__(
-        self,
-        input_device: Optional[Union[int, str]] = None,
-        hotword: Optional[str] = None,
-        hotwords: Optional[List[str]] = None,
-        conversation_timeout: Optional[float] = 10.0,
-        block_duration: float = 1.0,
-    ):
-        """
-        :param input_device: PortAudio device index or name that will be used for recording speech (default: default
-            system audio input device).
-        :param hotword: When this word is detected, the plugin will trigger a
-            :class:`platypush.message.event.stt.HotwordDetectedEvent` instead of a
-            :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can use these events for hooking other
-            assistants.
-        :param hotwords: Use a list of hotwords instead of a single one.
-        :param conversation_timeout: If ``hotword`` or ``hotwords`` are set and ``conversation_timeout`` is set,
-            the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
-            instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
-            here to run any logic depending on the detected speech - it can emulate a kind of
-            "OK, Google. Turn on the lights" interaction without using an external assistant (default: 10 seconds).
-        :param block_duration: Duration of the acquired audio blocks (default: 1 second).
-        """
-
-        super().__init__()
-        self.input_device = input_device
-        self.conversation_timeout = conversation_timeout
-        self.block_duration = block_duration
-
-        self.hotwords = set(hotwords or [])
-        if hotword:
-            self.hotwords = {hotword}
-
-        self._conversation_event = threading.Event()
-        self._input_stream: Optional[sd.InputStream] = None
-        self._recording_thread: Optional[threading.Thread] = None
-        self._detection_thread: Optional[threading.Thread] = None
-        self._audio_queue: Optional[queue.Queue] = None
-        self._current_text = ''
-
-    def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
-        """
-        Get the index of the input device by index or name.
-
-        :param device: Device index or name. If None is set then the function will return the index of the
-            default audio input device.
-        :return: Index of the audio input device.
-        """
-        if not device:
-            device = self.input_device
-        if not device:
-            return sd.query_hostapis()[0].get('default_input_device')
-
-        if isinstance(device, int):
-            assert device <= len(sd.query_devices())
-            return device
-
-        for i, dev in enumerate(sd.query_devices()):
-            if dev['name'] == device:
-                return i
-
-        raise AssertionError('Device {} not found'.format(device))
-
-    def on_speech_detected(self, speech: str) -> None:
-        """
-        Hook called when speech is detected. Triggers the right event depending on the current context.
-
-        :param speech: Detected speech.
-        """
-        speech = speech.strip()
-
-        if speech in self.hotwords:
-            event = HotwordDetectedEvent(hotword=speech)
-            if self.conversation_timeout:
-                self._conversation_event.set()
-                threading.Timer(
-                    self.conversation_timeout, lambda: self._conversation_event.clear()
-                ).start()
-        elif self._conversation_event.is_set():
-            event = ConversationDetectedEvent(speech=speech)
-        else:
-            event = SpeechDetectedEvent(speech=speech)
-
-        get_bus().post(event)
-
-    @staticmethod
-    def convert_frames(frames: bytes) -> bytes:
-        """
-        Conversion method for raw audio frames. It just returns the input frames as bytes. Override it if required
-        by your logic.
-
-        :param frames: Input audio frames, as bytes.
-        :return: The audio frames as passed on the input. Override if required.
-        """
-        return frames
-
-    def on_detection_started(self) -> None:
-        """
-        Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
-        required.
-        """
-        pass
-
-    def on_detection_ended(self) -> None:
-        """
-        Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
-        """
-        pass
-
-    def before_recording(self) -> None:
-        """
-        Method called when the ``recording_thread`` starts. Put here any logic that you may want to run before the
-        recording thread starts.
-        """
-        pass
-
-    def on_recording_started(self) -> None:
-        """
-        Method called after the ``recording_thread`` opens the audio device. Put here any logic that you may want to
-        run after the recording starts.
-        """
-        pass
-
-    def on_recording_ended(self) -> None:
-        """
-        Method called when the ``recording_thread`` stops. Put here any logic that you want to run after the audio
-        device is closed.
-        """
-        pass
-
-    @abstractmethod
-    def detect_speech(self, frames) -> str:
-        """
-        Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
-        by the derived classes.
-
-        :param frames: Audio frames, as returned by ``convert_frames``.
-        :return: Detected text, as a string. Returns an empty string if no text has been detected.
-        """
-        raise NotImplementedError
-
-    def process_text(self, text: str) -> None:
-        if (not text and self._current_text) or (text and text == self._current_text):
-            self.on_speech_detected(self._current_text)
-            self._current_text = ''
-        else:
-            if text:
-                if not self._current_text:
-                    get_bus().post(SpeechStartedEvent())
-                self.logger.info('Intermediate speech results: [{}]'.format(text))
-
-            self._current_text = text
-
-    def detection_thread(self) -> None:
-        """
-        This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
-        """
-        self._current_text = ''
-        self.logger.debug('Detection thread started')
-        self.on_detection_started()
-
-        while self._audio_queue:
-            try:
-                frames = self._audio_queue.get()
-                frames = self.convert_frames(frames)
-            except Exception as e:
-                self.logger.warning(
-                    'Error while feeding audio to the model: {}'.format(str(e))
-                )
-                continue
-
-            text = self.detect_speech(frames).strip()
-            self.process_text(text)
-
-        self.on_detection_ended()
-        self.logger.debug('Detection thread terminated')
-
-    def recording_thread(
-        self,
-        block_duration: Optional[float] = None,
-        block_size: Optional[int] = None,
-        input_device: Optional[str] = None,
-    ) -> None:
-        """
-        Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
-
-        :param block_duration: Audio blocks duration. Specify either ``block_duration`` or ``block_size``.
-        :param block_size: Size of the audio blocks. Specify either ``block_duration`` or ``block_size``.
-        :param input_device: Input device
-        """
-        assert (block_duration or block_size) and not (
-            block_duration and block_size
-        ), 'Please specify either block_duration or block_size'
-
-        if not block_size:
-            block_size = int(self.rate * self.channels * block_duration)
-
-        self.before_recording()
-        self.logger.debug('Recording thread started')
-        device = self._get_input_device(input_device)
-        self._input_stream = sd.InputStream(
-            samplerate=self.rate,
-            device=device,
-            channels=self.channels,
-            dtype='int16',
-            latency=0,
-            blocksize=block_size,
-        )
-        self._input_stream.start()
-        self.on_recording_started()
-        get_bus().post(SpeechDetectionStartedEvent())
-
-        while self._input_stream:
-            try:
-                frames = self._input_stream.read(block_size)[0]
-            except Exception as e:
-                self.logger.warning(
-                    'Error while reading from the audio input: {}'.format(str(e))
-                )
-                continue
-
-            self._audio_queue.put(frames)
-
-        get_bus().post(SpeechDetectionStoppedEvent())
-        self.on_recording_ended()
-        self.logger.debug('Recording thread terminated')
-
-    @abstractmethod
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file. Must be implemented by the derived classes.
-
-        :param audio_file: Path to the audio file.
-        """
-        raise NotImplementedError
-
-    def __enter__(self):
-        """
-        Context manager enter. Starts detection and returns self.
-        """
-        self.start_detection()
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        """
-        Context manager exit. Stops detection.
-        """
-        self.stop_detection()
-
-    @action
-    def start_detection(
-        self,
-        input_device: Optional[str] = None,
-        seconds: Optional[float] = None,
-        block_duration: Optional[float] = None,
-    ) -> None:
-        """
-        Start the speech detection engine.
-
-        :param input_device: Audio input device name/index override
-        :param seconds: If set, then the detection engine will stop after this many seconds, otherwise it'll
-            start running until ``stop_detection`` is called or application stop.
-        :param block_duration: ``block_duration`` override.
-        """
-        assert (
-            not self._input_stream and not self._recording_thread
-        ), 'Speech detection is already running'
-        block_duration = block_duration or self.block_duration
-        input_device = input_device if input_device is not None else self.input_device
-        self._audio_queue = queue.Queue()
-        self._recording_thread = threading.Thread(
-            target=lambda: self.recording_thread(
-                block_duration=block_duration, input_device=input_device
-            )
-        )
-
-        self._recording_thread.start()
-        self._detection_thread = threading.Thread(
-            target=lambda: self.detection_thread()
-        )
-        self._detection_thread.start()
-
-        if seconds:
-            threading.Timer(seconds, lambda: self.stop_detection()).start()
-
-    @action
-    def stop_detection(self) -> None:
-        """
-        Stop the speech detection engine.
-        """
-        assert self._input_stream, 'Speech detection is not running'
-        self._input_stream.stop(ignore_errors=True)
-        self._input_stream.close(ignore_errors=True)
-        self._input_stream = None
-
-        if self._recording_thread:
-            self._recording_thread.join(timeout=self._thread_stop_timeout)
-            self._recording_thread = None
-
-        self._audio_queue = None
-        if self._detection_thread:
-            self._detection_thread.join(timeout=self._thread_stop_timeout)
-            self._detection_thread = None
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/plugins/stt/picovoice/init.py
+++ b/platypush/plugins/stt/picovoice/init.py
--- a/platypush/plugins/stt/picovoice/hotword/init.py
+++ b/platypush/plugins/stt/picovoice/hotword/init.py
@ -1,120 +0,0 @@
-import os
-import struct
-from typing import Optional, List
-
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import action
-from platypush.plugins.stt import SttPlugin
-
-
-class SttPicovoiceHotwordPlugin(SttPlugin):
-    """
-    This plugin performs hotword detection using `PicoVoice <https://github.com/Picovoice>`_.
-    """
-
-    def __init__(
-        self,
-        library_path: Optional[str] = None,
-        model_file_path: Optional[str] = None,
-        keyword_file_paths: Optional[List[str]] = None,
-        sensitivity: float = 0.5,
-        sensitivities: Optional[List[float]] = None,
-        *args,
-        **kwargs
-    ):
-        from pvporcupine import Porcupine
-        from pvporcupine.resources.util.python.util import (
-            LIBRARY_PATH,
-            MODEL_FILE_PATH,
-            KEYWORD_FILE_PATHS,
-        )
-
-        super().__init__(*args, **kwargs)
-
-        self.hotwords = list(self.hotwords)
-        self._hotword_engine: Optional[Porcupine] = None
-        self._library_path = os.path.abspath(
-            os.path.expanduser(library_path or LIBRARY_PATH)
-        )
-        self._model_file_path = os.path.abspath(
-            os.path.expanduser(model_file_path or MODEL_FILE_PATH)
-        )
-
-        if not keyword_file_paths:
-            hotwords = KEYWORD_FILE_PATHS
-            assert all(
-                hotword in hotwords for hotword in self.hotwords
-            ), 'Not all the hotwords could be found. Available hotwords: {}'.format(
-                list(hotwords.keys())
-            )
-
-            self._keyword_file_paths = [
-                os.path.abspath(os.path.expanduser(hotwords[hotword]))
-                for hotword in self.hotwords
-            ]
-        else:
-            self._keyword_file_paths = [
-                os.path.abspath(os.path.expanduser(p)) for p in keyword_file_paths
-            ]
-
-        self._sensitivities = []
-        if sensitivities:
-            assert len(self._keyword_file_paths) == len(
-                sensitivities
-            ), 'Please specify as many sensitivities as the number of configured hotwords'
-
-            self._sensitivities = sensitivities
-        else:
-            self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
-
-    def convert_frames(self, frames: bytes) -> tuple:
-        assert self._hotword_engine, 'The hotword engine is not running'
-        return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
-
-    def on_detection_ended(self) -> None:
-        if self._hotword_engine:
-            self._hotword_engine.delete()
-        self._hotword_engine = None
-
-    def detect_speech(self, frames: tuple) -> str:
-        index = self._hotword_engine.process(frames)
-        if index < 0:
-            return ''
-
-        if index is True:
-            index = 0
-        return self.hotwords[index]
-
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file.
-
-        :param audio_file: Path to the audio file.
-        """
-        pass
-
-    def recording_thread(
-        self, input_device: Optional[str] = None, *args, **kwargs
-    ) -> None:
-        assert self._hotword_engine, 'The hotword engine has not yet been initialized'
-        super().recording_thread(
-            block_size=self._hotword_engine.frame_length, input_device=input_device
-        )
-
-    @action
-    def start_detection(self, *args, **kwargs) -> None:
-        from pvporcupine import Porcupine
-
-        self._hotword_engine = Porcupine(
-            library_path=self._library_path,
-            model_file_path=self._model_file_path,
-            keyword_file_paths=self._keyword_file_paths,
-            sensitivities=self._sensitivities,
-        )
-
-        self.rate = self._hotword_engine.sample_rate
-        super().start_detection(*args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/plugins/stt/picovoice/hotword/manifest.yaml
+++ b/platypush/plugins/stt/picovoice/hotword/manifest.yaml
@ -1,7 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip:
-    - pvporcupine
-  package: platypush.plugins.stt.picovoice.hotword
-  type: plugin
--- a/platypush/plugins/stt/picovoice/speech/init.py
+++ b/platypush/plugins/stt/picovoice/speech/init.py
@ -1,154 +0,0 @@
-import inspect
-import os
-import platform
-import struct
-import threading
-from typing import Optional
-
-from platypush.message.event.stt import SpeechStartedEvent
-
-from platypush.context import get_bus
-from platypush.message.response.stt import SpeechDetectedResponse
-from platypush.plugins import action
-from platypush.plugins.stt import SttPlugin
-
-
-class SttPicovoiceSpeechPlugin(SttPlugin):
-    """
-    This plugin performs speech detection using `PicoVoice <https://github.com/Picovoice>`_.
-    NOTE: The PicoVoice product used for real-time speech-to-text (Cheetah) can be used freely for
-    personal applications on x86_64 Linux. Other architectures and operating systems require a commercial license.
-    You can ask for a license `here <https://picovoice.ai/contact.html>`_.
-    """
-
-    def __init__(
-        self,
-        library_path: Optional[str] = None,
-        acoustic_model_path: Optional[str] = None,
-        language_model_path: Optional[str] = None,
-        license_path: Optional[str] = None,
-        end_of_speech_timeout: int = 1,
-        *args,
-        **kwargs
-    ):
-        """
-        :param library_path: Path to the Cheetah binary library for your OS
-            (default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``).
-        :param acoustic_model_path: Path to the acoustic speech model
-            (default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``).
-        :param language_model_path:  Path to the language model
-            (default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``).
-        :param license_path: Path to your PicoVoice license
-            (default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``).
-        :param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering
-            a phrase over (default: 1).
-        """
-        from pvcheetah import Cheetah
-
-        super().__init__(*args, **kwargs)
-
-        self._basedir = os.path.abspath(
-            os.path.join(inspect.getfile(Cheetah), '..', '..', '..')
-        )
-        if not library_path:
-            library_path = self._get_library_path()
-        if not language_model_path:
-            language_model_path = os.path.join(
-                self._basedir, 'lib', 'common', 'language_model.pv'
-            )
-        if not acoustic_model_path:
-            acoustic_model_path = os.path.join(
-                self._basedir, 'lib', 'common', 'acoustic_model.pv'
-            )
-        if not license_path:
-            license_path = os.path.join(
-                self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic'
-            )
-
-        self._library_path = library_path
-        self._language_model_path = language_model_path
-        self._acoustic_model_path = acoustic_model_path
-        self._license_path = license_path
-        self._end_of_speech_timeout = end_of_speech_timeout
-        self._stt_engine: Optional[Cheetah] = None
-        self._speech_in_progress = threading.Event()
-
-    def _get_library_path(self) -> str:
-        path = os.path.join(
-            self._basedir, 'lib', platform.system().lower(), platform.machine()
-        )
-        return os.path.join(
-            path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0]
-        )
-
-    def convert_frames(self, frames: bytes) -> tuple:
-        assert self._stt_engine, 'The speech engine is not running'
-        return struct.unpack_from("h" * self._stt_engine.frame_length, frames)
-
-    def on_detection_ended(self) -> None:
-        if self._stt_engine:
-            self._stt_engine.delete()
-        self._stt_engine = None
-
-    def detect_speech(self, frames: tuple) -> str:
-        text, is_endpoint = self._stt_engine.process(frames)
-        text = text.strip()
-
-        if text:
-            if not self._speech_in_progress.is_set():
-                self._speech_in_progress.set()
-                get_bus().post(SpeechStartedEvent())
-
-            self._current_text += ' ' + text.strip()
-
-        if is_endpoint:
-            text = self._stt_engine.flush().strip().strip()
-            if text:
-                self._current_text += ' ' + text
-
-            self._speech_in_progress.clear()
-            if self._current_text:
-                self.on_speech_detected(self._current_text)
-
-            self._current_text = ''
-
-        return self._current_text
-
-    def process_text(self, text: str) -> None:
-        pass
-
-    @action
-    def detect(self, audio_file: str) -> SpeechDetectedResponse:
-        """
-        Perform speech-to-text analysis on an audio file.
-
-        :param audio_file: Path to the audio file.
-        """
-        pass
-
-    def recording_thread(
-        self, input_device: Optional[str] = None, *args, **kwargs
-    ) -> None:
-        assert self._stt_engine, 'The hotword engine has not yet been initialized'
-        super().recording_thread(
-            block_size=self._stt_engine.frame_length, input_device=input_device
-        )
-
-    @action
-    def start_detection(self, *args, **kwargs) -> None:
-        from pvcheetah import Cheetah
-
-        self._stt_engine = Cheetah(
-            library_path=self._library_path,
-            acoustic_model_path=self._acoustic_model_path,
-            language_model_path=self._language_model_path,
-            license_path=self._license_path,
-            endpoint_duration_sec=self._end_of_speech_timeout,
-        )
-
-        self.rate = self._stt_engine.sample_rate
-        self._speech_in_progress.clear()
-        super().start_detection(*args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
--- a/platypush/plugins/stt/picovoice/speech/manifest.yaml
+++ b/platypush/plugins/stt/picovoice/speech/manifest.yaml
@ -1,7 +0,0 @@
-manifest:
-  events: {}
-  install:
-    pip:
-    - cheetah
-  package: platypush.plugins.stt.picovoice.speech
-  type: plugin
--- a/platypush/utils/mock/modules.py
+++ b/platypush/utils/mock/modules.py
@ -83,7 +83,10 @@ mock_imports = [
    "pmw3901",
    "psutil",
    "pvcheetah",
+    "pvleopard",
+    "pvorca",
    "pvporcupine",
+    "pvrhino",
    "pyHS100",
    "pyaudio",
    "pychromecast",
Author	SHA1	Message	Date
Fabio Manganiello	fa70c91a67	[assistant.picovoice] Better partial transcript + flush handling logic.	2024-04-08 13:33:07 +02:00
Fabio Manganiello	c7094c7886	`picovoice` -> `assistant.picovoice`	2024-04-08 13:26:44 +02:00
Fabio Manganiello	0b8e1bb81b	Better integration with the native base API of the assistant plugin.	2024-04-08 03:02:03 +02:00
Fabio Manganiello	f021b471aa	[WIP] Added speech detection logic over Cheetah.	2024-04-08 01:54:26 +02:00
Fabio Manganiello	01dec0b7a4	[WIP] Added initial hotword integration.	2024-04-07 22:42:01 +02:00
Fabio Manganiello	f0382c73ab	[#304 ] Removed old Picovoice integrations	2024-04-06 00:11:46 +02:00