[WIP] Added initial hotword integration.

2024-04-07 22:42:01 +02:00 · 2024-04-07 22:42:01 +02:00 · 01dec0b7a4
commit 01dec0b7a4
parent f0382c73ab
7 changed files with 462 additions and 4 deletions
--- a/platypush/plugins/picovoice/init.py
+++ b/platypush/plugins/picovoice/init.py
@ -0,0 +1,170 @@
+from typing import Optional, Sequence
+
+from platypush.context import get_bus
+from platypush.plugins import RunnablePlugin, action
+from platypush.plugins.assistant import AssistantPlugin
+
+from ._assistant import Assistant
+
+
+# pylint: disable=too-many-ancestors
+class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
+    """
+    A voice assistant that runs on your device, based on the `Picovoice
+    <https://picovoice.ai/>`_ engine.
+
+    .. note:: You will need a PicoVoice account and a personal access key to
+        use this integration.
+
+    You can get your personal access key by signing up at the `Picovoice
+    console <https://console.picovoice.ai/>`_. You may be asked to submit a
+    reason for using the service (feel free to mention a personal Platypush
+    integration), and you will receive your personal access key.
+
+    You may also be asked to select which products you want to use. The default
+    configuration of this plugin requires the following:
+
+        * **Porcupine**: wake-word engine, if you want the device to listen for
+          a specific wake word in order to start the assistant.
+
+        * **Cheetah**: speech-to-text engine, if you want your voice
+          interactions to be transcribed into free text - either programmatically
+          or when triggered by the wake word. Or:
+
+        * **Rhino**: intent recognition engine, if you want to extract *intents*
+          out of your voice commands - for instance, the phrase "set the living
+          room temperature to 20 degrees" could be mapped to the intent with the
+          following parameters: ``intent``: ``set_temperature``, ``room``:
+          ``living_room``, ``temperature``: ``20``.
+
+        * **Leopard**: speech-to-text engine aimed at offline transcription of
+          audio files rather than real-time transcription.
+
+    """
+
+    def __init__(
+        self,
+        access_key: str,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        :param access_key: Your Picovoice access key. You can get it by signing
+            up at the `Picovoice console <https://console.picovoice.ai/>`.
+        :param hotword_enabled: Enable the wake-word engine (default: True).
+            .. note:: The wake-word engine requires you to add Porcupine to the
+                products available in your Picovoice account.
+        :param stt_enabled: Enable the speech-to-text engine (default: True).
+            .. note:: The speech-to-text engine requires you to add Cheetah to
+                the products available in your Picovoice account.
+        :param intent_enabled: Enable the intent recognition engine (default:
+            False).
+            .. note:: The intent recognition engine requires you to add Rhino
+                to the products available in your Picovoice account.
+        :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
+            google``...). Either ``keywords`` or ``keyword_paths`` must be
+            provided if the wake-word engine is enabled. This list can include
+            any of the default Picovoice keywords (available on the `Picovoice
+            repository
+            <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
+        :param keyword_paths: List of paths to the keyword files to listen for.
+            Custom keyword files can be created using the `Picovoice console
+            <https://console.picovoice.ai/ppn>`_ and downloaded from the
+            console itself.
+        :param keyword_model_path: If you are using a keyword file in a
+            non-English language, you can provide the path to the model file
+            for its language. Model files are available for all the supported
+            languages through the `Picovoice repository
+            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
+        """
+        super().__init__(**kwargs)
+        self._assistant_args = {
+            'stop_event': self._should_stop,
+            'access_key': access_key,
+            'hotword_enabled': hotword_enabled,
+            'stt_enabled': stt_enabled,
+            'intent_enabled': intent_enabled,
+            'keywords': keywords,
+            'keyword_paths': keyword_paths,
+            'keyword_model_path': keyword_model_path,
+        }
+
+    @action
+    def start_conversation(self, *_, **__):
+        """
+        Programmatically start a conversation with the assistant
+        """
+
+    @action
+    def stop_conversation(self, *_, **__):
+        """
+        Programmatically stop a running conversation with the assistant
+        """
+
+    @action
+    def mute(self, *_, **__):
+        """
+        Mute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=True``.
+        """
+
+    @action
+    def unmute(self, *_, **__):
+        """
+        Unmute the microphone. Alias for :meth:`.set_mic_mute` with
+        ``muted=False``.
+        """
+
+    @action
+    def set_mic_mute(self, muted: bool):
+        """
+        Programmatically mute/unmute the microphone.
+
+        :param muted: Set to True or False.
+        """
+
+    @action
+    def toggle_mute(self, *_, **__):
+        """
+        Toggle the mic mute state.
+        """
+
+    @action
+    def send_text_query(self, *_, query: str, **__):
+        """
+        Send a text query to the assistant.
+
+        This is equivalent to saying something to the assistant.
+
+        :param query: Query to be sent.
+        """
+
+    def main(self):
+        while not self.should_stop():
+            self.logger.info('Starting Picovoice assistant')
+            with Assistant(**self._assistant_args) as assistant:
+                try:
+                    for event in assistant:
+                        if event:
+                            get_bus().post(event)
+                except KeyboardInterrupt:
+                    break
+                except Exception as e:
+                    self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
+                    self.wait_stop(5)
+
+    def stop(self):
+        try:
+            self.stop_conversation()
+        except RuntimeError:
+            pass
+
+        super().stop()
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@ -0,0 +1,171 @@
+import logging
+import os
+from threading import Event
+from time import time
+from typing import Any, Dict, Optional, Sequence
+
+import pvcheetah
+import pvleopard
+import pvporcupine
+import pvrhino
+
+from platypush.message.event.assistant import HotwordDetectedEvent
+
+from ._recorder import AudioRecorder
+
+
+class Assistant:
+    """
+    A facade class that wraps the Picovoice engines under an assistant API.
+    """
+
+    def __init__(
+        self,
+        access_key: str,
+        stop_event: Event,
+        hotword_enabled: bool = True,
+        stt_enabled: bool = True,
+        intent_enabled: bool = False,
+        keywords: Optional[Sequence[str]] = None,
+        keyword_paths: Optional[Sequence[str]] = None,
+        keyword_model_path: Optional[str] = None,
+        frame_expiration: float = 3.0,  # Don't process audio frames older than this
+    ):
+        self.logger = logging.getLogger(__name__)
+        self._access_key = access_key
+        self._stop_event = stop_event
+        self.hotword_enabled = hotword_enabled
+        self.stt_enabled = stt_enabled
+        self.intent_enabled = intent_enabled
+        self.keywords = list(keywords or [])
+        self.keyword_paths = None
+        self.keyword_model_path = None
+        self.frame_expiration = frame_expiration
+        self._recorder = None
+
+        if hotword_enabled:
+            if keyword_paths:
+                keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
+                missing_paths = [
+                    path for path in keyword_paths if not os.path.isfile(path)
+                ]
+                if missing_paths:
+                    raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
+
+                self.keyword_paths = keyword_paths
+
+            if keyword_model_path:
+                keyword_model_path = os.path.expanduser(keyword_model_path)
+                if not os.path.isfile(keyword_model_path):
+                    raise FileNotFoundError(
+                        f'Keyword model file not found: {keyword_model_path}'
+                    )
+
+                self.keyword_model_path = keyword_model_path
+
+        self._cheetah: Optional[pvcheetah.Cheetah] = None
+        self._leopard: Optional[pvleopard.Leopard] = None
+        self._porcupine: Optional[pvporcupine.Porcupine] = None
+        self._rhino: Optional[pvrhino.Rhino] = None
+
+    def should_stop(self):
+        return self._stop_event.is_set()
+
+    def wait_stop(self):
+        self._stop_event.wait()
+
+    def _create_porcupine(self):
+        if not self.hotword_enabled:
+            return None
+
+        args: Dict[str, Any] = {'access_key': self._access_key}
+        if not (self.keywords or self.keyword_paths):
+            raise ValueError(
+                'You need to provide either a list of keywords or a list of '
+                'keyword paths if the wake-word engine is enabled'
+            )
+
+        if self.keywords:
+            args['keywords'] = self.keywords
+        if self.keyword_paths:
+            args['keyword_paths'] = self.keyword_paths
+        if self.keyword_model_path:
+            args['model_path'] = self.keyword_model_path
+
+        return pvporcupine.create(**args)
+
+    @property
+    def porcupine(self) -> Optional[pvporcupine.Porcupine]:
+        if not self._porcupine:
+            self._porcupine = self._create_porcupine()
+
+        return self._porcupine
+
+    def __enter__(self):
+        if self._recorder:
+            self.logger.info('A recording stream already exists')
+        elif self.porcupine:
+            self._recorder = AudioRecorder(
+                stop_event=self._stop_event,
+                sample_rate=self.porcupine.sample_rate,
+                frame_size=self.porcupine.frame_length,
+                channels=1,
+            )
+
+            self._recorder.__enter__()
+
+        return self
+
+    def __exit__(self, *_):
+        if self._recorder:
+            self._recorder.__exit__(*_)
+            self._recorder = None
+
+        if self._cheetah:
+            self._cheetah.delete()
+            self._cheetah = None
+
+        if self._leopard:
+            self._leopard.delete()
+            self._leopard = None
+
+        if self._porcupine:
+            self._porcupine.delete()
+            self._porcupine = None
+
+        if self._rhino:
+            self._rhino.delete()
+            self._rhino = None
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        has_data = False
+        if self.should_stop() or not self._recorder:
+            raise StopIteration
+
+        while not (self.should_stop() or has_data):
+            if self.porcupine:  # TODO also check current state
+                data = self._recorder.read()
+                if data is None:
+                    continue
+
+                frame, t = data
+                if time() - t > self.frame_expiration:
+                    self.logger.info(
+                        'Skipping audio frame older than %ss', self.frame_expiration
+                    )
+                    continue  # The audio frame is too old
+
+                keyword_index = self.porcupine.process(frame)
+                if keyword_index is None:
+                    continue  # No keyword detected
+
+                if keyword_index >= 0 and self.keywords:
+                    return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
+
+        raise StopIteration
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/_recorder.py
+++ b/platypush/plugins/picovoice/_recorder.py
@ -0,0 +1,77 @@
+from collections import namedtuple
+from logging import getLogger
+from queue import Full, Queue
+from threading import Event
+from time import time
+from typing import Optional
+
+import sounddevice as sd
+
+from platypush.utils import wait_for_either
+
+
+AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
+
+
+class AudioRecorder:
+    """
+    Audio recorder component that uses the sounddevice library to record audio
+    from the microphone.
+    """
+
+    def __init__(
+        self,
+        stop_event: Event,
+        sample_rate: int,
+        frame_size: int,
+        channels: int,
+        dtype: str = 'int16',
+        queue_size: int = 20,
+    ):
+        self.logger = getLogger(__name__)
+        self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
+        self.frame_size = frame_size
+        self._stop_event = Event()
+        self._upstream_stop_event = stop_event
+        self.stream = sd.InputStream(
+            samplerate=sample_rate,
+            channels=channels,
+            dtype=dtype,
+            blocksize=frame_size,
+            callback=self._audio_callback,
+        )
+
+    def __enter__(self):
+        self._stop_event.clear()
+        self.stream.start()
+        return self
+
+    def __exit__(self, *_):
+        self.stop()
+        # self.stream.close()
+
+    def _audio_callback(self, indata, *_):
+        if self.should_stop():
+            return
+
+        try:
+            self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
+        except Full:
+            self.logger.warning('Audio queue is full, dropping audio frame')
+
+    def read(self, timeout: Optional[float] = None):
+        try:
+            return self._audio_queue.get(timeout=timeout)
+        except TimeoutError:
+            self.logger.debug('Audio queue is empty')
+            return None
+
+    def stop(self):
+        self._stop_event.set()
+        self.stream.stop()
+
+    def should_stop(self):
+        return self._stop_event.is_set() or self._upstream_stop_event.is_set()
+
+    def wait(self, timeout: Optional[float] = None):
+        wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
--- a/platypush/plugins/picovoice/_state.py
+++ b/platypush/plugins/picovoice/_state.py
@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class AssistantState(Enum):
+    """
+    Possible states of the assistant.
+    """
+
+    IDLE = 'idle'
+    DETECTING_HOTWORD = 'detecting_hotword'
+    DETECTING_SPEECH = 'detecting_speech'
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/manifest.yaml
+++ b/platypush/plugins/picovoice/manifest.yaml
@ -0,0 +1,22 @@
+manifest:
+  package: platypush.plugins.picovoice
+  type: plugin
+  events:
+    - platypush.message.event.assistant.ConversationEndEvent
+    - platypush.message.event.assistant.ConversationStartEvent
+    - platypush.message.event.assistant.ConversationTimeoutEvent
+    - platypush.message.event.assistant.HotwordDetectedEvent
+    - platypush.message.event.assistant.MicMutedEvent
+    - platypush.message.event.assistant.MicUnmutedEvent
+    - platypush.message.event.assistant.NoResponseEvent
+    - platypush.message.event.assistant.ResponseEvent
+    - platypush.message.event.assistant.SpeechRecognizedEvent
+  install:
+    pacman:
+      - python-sounddevice
+    pip:
+      - pvcheetah
+      - pvleopard
+      - pvporcupine
+      - pvrhino
+      - sounddevice
--- a/platypush/plugins/sound/_manager/_main.py
+++ b/platypush/plugins/sound/_manager/_main.py
@ -247,10 +247,12 @@ class AudioManager:
        wait_start = time()
        for audio_thread in streams_to_stop:
            audio_thread.join(
-                timeout=max(0, timeout - (time() - wait_start))
+                timeout=(
+                    max(0, timeout - (time() - wait_start))
                    if timeout is not None
                    else None
                )
+            )

        # Remove references
        for audio_thread in streams_to_stop:
--- a/platypush/utils/mock/modules.py
+++ b/platypush/utils/mock/modules.py
@ -83,7 +83,9 @@ mock_imports = [
    "pmw3901",
    "psutil",
    "pvcheetah",
-    "pvporcupine ",
+    "pvleopard",
+    "pvporcupine",
+    "pvrhino",
    "pyHS100",
    "pyaudio",
    "pychromecast",