[WIP] Added initial hotword integration.

2024-04-07 22:42:01 +02:00 · 2024-04-07 22:42:01 +02:00 · 01dec0b7a4
commit 01dec0b7a4
parent f0382c73ab
7 changed files with 462 additions and 4 deletions
--- a/platypush/plugins/picovoice/init.py
+++ b/platypush/plugins/picovoice/init.py
@ -0,0 +1,170 @@
 from typing import Optional, Sequence
 from platypush.context import get_bus
 from platypush.plugins import RunnablePlugin, action
 from platypush.plugins.assistant import AssistantPlugin
 from ._assistant import Assistant
 # pylint: disable=too-many-ancestors
 class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
    """
    A voice assistant that runs on your device, based on the `Picovoice
    <https://picovoice.ai/>`_ engine.
    .. note:: You will need a PicoVoice account and a personal access key to
        use this integration.
    You can get your personal access key by signing up at the `Picovoice
    console <https://console.picovoice.ai/>`_. You may be asked to submit a
    reason for using the service (feel free to mention a personal Platypush
    integration), and you will receive your personal access key.
    You may also be asked to select which products you want to use. The default
    configuration of this plugin requires the following:
        * **Porcupine**: wake-word engine, if you want the device to listen for
          a specific wake word in order to start the assistant.
        * **Cheetah**: speech-to-text engine, if you want your voice
          interactions to be transcribed into free text - either programmatically
          or when triggered by the wake word. Or:
        * **Rhino**: intent recognition engine, if you want to extract *intents*
          out of your voice commands - for instance, the phrase "set the living
          room temperature to 20 degrees" could be mapped to the intent with the
          following parameters: ``intent``: ``set_temperature``, ``room``:
          ``living_room``, ``temperature``: ``20``.
        * **Leopard**: speech-to-text engine aimed at offline transcription of
          audio files rather than real-time transcription.
    """
    def __init__(
        self,
        access_key: str,
        hotword_enabled: bool = True,
        stt_enabled: bool = True,
        intent_enabled: bool = False,
        keywords: Optional[Sequence[str]] = None,
        keyword_paths: Optional[Sequence[str]] = None,
        keyword_model_path: Optional[str] = None,
        **kwargs,
    ):
        """
        :param access_key: Your Picovoice access key. You can get it by signing
            up at the `Picovoice console <https://console.picovoice.ai/>`.
        :param hotword_enabled: Enable the wake-word engine (default: True).
            .. note:: The wake-word engine requires you to add Porcupine to the
                products available in your Picovoice account.
        :param stt_enabled: Enable the speech-to-text engine (default: True).
            .. note:: The speech-to-text engine requires you to add Cheetah to
                the products available in your Picovoice account.
        :param intent_enabled: Enable the intent recognition engine (default:
            False).
            .. note:: The intent recognition engine requires you to add Rhino
                to the products available in your Picovoice account.
        :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
            google``...). Either ``keywords`` or ``keyword_paths`` must be
            provided if the wake-word engine is enabled. This list can include
            any of the default Picovoice keywords (available on the `Picovoice
            repository
            <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
        :param keyword_paths: List of paths to the keyword files to listen for.
            Custom keyword files can be created using the `Picovoice console
            <https://console.picovoice.ai/ppn>`_ and downloaded from the
            console itself.
        :param keyword_model_path: If you are using a keyword file in a
            non-English language, you can provide the path to the model file
            for its language. Model files are available for all the supported
            languages through the `Picovoice repository
            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
        """
        super().__init__(**kwargs)
        self._assistant_args = {
            'stop_event': self._should_stop,
            'access_key': access_key,
            'hotword_enabled': hotword_enabled,
            'stt_enabled': stt_enabled,
            'intent_enabled': intent_enabled,
            'keywords': keywords,
            'keyword_paths': keyword_paths,
            'keyword_model_path': keyword_model_path,
        }
    @action
    def start_conversation(self, *_, **__):
        """
        Programmatically start a conversation with the assistant
        """
    @action
    def stop_conversation(self, *_, **__):
        """
        Programmatically stop a running conversation with the assistant
        """
    @action
    def mute(self, *_, **__):
        """
        Mute the microphone. Alias for :meth:`.set_mic_mute` with
        ``muted=True``.
        """
    @action
    def unmute(self, *_, **__):
        """
        Unmute the microphone. Alias for :meth:`.set_mic_mute` with
        ``muted=False``.
        """
    @action
    def set_mic_mute(self, muted: bool):
        """
        Programmatically mute/unmute the microphone.
        :param muted: Set to True or False.
        """
    @action
    def toggle_mute(self, *_, **__):
        """
        Toggle the mic mute state.
        """
    @action
    def send_text_query(self, *_, query: str, **__):
        """
        Send a text query to the assistant.
        This is equivalent to saying something to the assistant.
        :param query: Query to be sent.
        """
    def main(self):
        while not self.should_stop():
            self.logger.info('Starting Picovoice assistant')
            with Assistant(**self._assistant_args) as assistant:
                try:
                    for event in assistant:
                        if event:
                            get_bus().post(event)
                except KeyboardInterrupt:
                    break
                except Exception as e:
                    self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
                    self.wait_stop(5)
    def stop(self):
        try:
            self.stop_conversation()
        except RuntimeError:
            pass
        super().stop()
 # vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/_assistant.py
+++ b/platypush/plugins/picovoice/_assistant.py
@ -0,0 +1,171 @@
 import logging
 import os
 from threading import Event
 from time import time
 from typing import Any, Dict, Optional, Sequence
 import pvcheetah
 import pvleopard
 import pvporcupine
 import pvrhino
 from platypush.message.event.assistant import HotwordDetectedEvent
 from ._recorder import AudioRecorder
 class Assistant:
    """
    A facade class that wraps the Picovoice engines under an assistant API.
    """
    def __init__(
        self,
        access_key: str,
        stop_event: Event,
        hotword_enabled: bool = True,
        stt_enabled: bool = True,
        intent_enabled: bool = False,
        keywords: Optional[Sequence[str]] = None,
        keyword_paths: Optional[Sequence[str]] = None,
        keyword_model_path: Optional[str] = None,
        frame_expiration: float = 3.0,  # Don't process audio frames older than this
    ):
        self.logger = logging.getLogger(__name__)
        self._access_key = access_key
        self._stop_event = stop_event
        self.hotword_enabled = hotword_enabled
        self.stt_enabled = stt_enabled
        self.intent_enabled = intent_enabled
        self.keywords = list(keywords or [])
        self.keyword_paths = None
        self.keyword_model_path = None
        self.frame_expiration = frame_expiration
        self._recorder = None
        if hotword_enabled:
            if keyword_paths:
                keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
                missing_paths = [
                    path for path in keyword_paths if not os.path.isfile(path)
                ]
                if missing_paths:
                    raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
                self.keyword_paths = keyword_paths
            if keyword_model_path:
                keyword_model_path = os.path.expanduser(keyword_model_path)
                if not os.path.isfile(keyword_model_path):
                    raise FileNotFoundError(
                        f'Keyword model file not found: {keyword_model_path}'
                    )
                self.keyword_model_path = keyword_model_path
        self._cheetah: Optional[pvcheetah.Cheetah] = None
        self._leopard: Optional[pvleopard.Leopard] = None
        self._porcupine: Optional[pvporcupine.Porcupine] = None
        self._rhino: Optional[pvrhino.Rhino] = None
    def should_stop(self):
        return self._stop_event.is_set()
    def wait_stop(self):
        self._stop_event.wait()
    def _create_porcupine(self):
        if not self.hotword_enabled:
            return None
        args: Dict[str, Any] = {'access_key': self._access_key}
        if not (self.keywords or self.keyword_paths):
            raise ValueError(
                'You need to provide either a list of keywords or a list of '
                'keyword paths if the wake-word engine is enabled'
            )
        if self.keywords:
            args['keywords'] = self.keywords
        if self.keyword_paths:
            args['keyword_paths'] = self.keyword_paths
        if self.keyword_model_path:
            args['model_path'] = self.keyword_model_path
        return pvporcupine.create(**args)
    @property
    def porcupine(self) -> Optional[pvporcupine.Porcupine]:
        if not self._porcupine:
            self._porcupine = self._create_porcupine()
        return self._porcupine
    def __enter__(self):
        if self._recorder:
            self.logger.info('A recording stream already exists')
        elif self.porcupine:
            self._recorder = AudioRecorder(
                stop_event=self._stop_event,
                sample_rate=self.porcupine.sample_rate,
                frame_size=self.porcupine.frame_length,
                channels=1,
            )
            self._recorder.__enter__()
        return self
    def __exit__(self, *_):
        if self._recorder:
            self._recorder.__exit__(*_)
            self._recorder = None
        if self._cheetah:
            self._cheetah.delete()
            self._cheetah = None
        if self._leopard:
            self._leopard.delete()
            self._leopard = None
        if self._porcupine:
            self._porcupine.delete()
            self._porcupine = None
        if self._rhino:
            self._rhino.delete()
            self._rhino = None
    def __iter__(self):
        return self
    def __next__(self):
        has_data = False
        if self.should_stop() or not self._recorder:
            raise StopIteration
        while not (self.should_stop() or has_data):
            if self.porcupine:  # TODO also check current state
                data = self._recorder.read()
                if data is None:
                    continue
                frame, t = data
                if time() - t > self.frame_expiration:
                    self.logger.info(
                        'Skipping audio frame older than %ss', self.frame_expiration
                    )
                    continue  # The audio frame is too old
                keyword_index = self.porcupine.process(frame)
                if keyword_index is None:
                    continue  # No keyword detected
                if keyword_index >= 0 and self.keywords:
                    return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
        raise StopIteration
 # vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/_recorder.py
+++ b/platypush/plugins/picovoice/_recorder.py
@ -0,0 +1,77 @@
 from collections import namedtuple
 from logging import getLogger
 from queue import Full, Queue
 from threading import Event
 from time import time
 from typing import Optional
 import sounddevice as sd
 from platypush.utils import wait_for_either
 AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
 class AudioRecorder:
    """
    Audio recorder component that uses the sounddevice library to record audio
    from the microphone.
    """
    def __init__(
        self,
        stop_event: Event,
        sample_rate: int,
        frame_size: int,
        channels: int,
        dtype: str = 'int16',
        queue_size: int = 20,
    ):
        self.logger = getLogger(__name__)
        self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
        self.frame_size = frame_size
        self._stop_event = Event()
        self._upstream_stop_event = stop_event
        self.stream = sd.InputStream(
            samplerate=sample_rate,
            channels=channels,
            dtype=dtype,
            blocksize=frame_size,
            callback=self._audio_callback,
        )
    def __enter__(self):
        self._stop_event.clear()
        self.stream.start()
        return self
    def __exit__(self, *_):
        self.stop()
        # self.stream.close()
    def _audio_callback(self, indata, *_):
        if self.should_stop():
            return
        try:
            self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
        except Full:
            self.logger.warning('Audio queue is full, dropping audio frame')
    def read(self, timeout: Optional[float] = None):
        try:
            return self._audio_queue.get(timeout=timeout)
        except TimeoutError:
            self.logger.debug('Audio queue is empty')
            return None
    def stop(self):
        self._stop_event.set()
        self.stream.stop()
    def should_stop(self):
        return self._stop_event.is_set() or self._upstream_stop_event.is_set()
    def wait(self, timeout: Optional[float] = None):
        wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
--- a/platypush/plugins/picovoice/_state.py
+++ b/platypush/plugins/picovoice/_state.py
@ -0,0 +1,14 @@
 from enum import Enum
 class AssistantState(Enum):
    """
    Possible states of the assistant.
    """
    IDLE = 'idle'
    DETECTING_HOTWORD = 'detecting_hotword'
    DETECTING_SPEECH = 'detecting_speech'
 # vim:sw=4:ts=4:et:
--- a/platypush/plugins/picovoice/manifest.yaml
+++ b/platypush/plugins/picovoice/manifest.yaml
@ -0,0 +1,22 @@
 manifest:
  package: platypush.plugins.picovoice
  type: plugin
  events:
    - platypush.message.event.assistant.ConversationEndEvent
    - platypush.message.event.assistant.ConversationStartEvent
    - platypush.message.event.assistant.ConversationTimeoutEvent
    - platypush.message.event.assistant.HotwordDetectedEvent
    - platypush.message.event.assistant.MicMutedEvent
    - platypush.message.event.assistant.MicUnmutedEvent
    - platypush.message.event.assistant.NoResponseEvent
    - platypush.message.event.assistant.ResponseEvent
    - platypush.message.event.assistant.SpeechRecognizedEvent
  install:
    pacman:
      - python-sounddevice
    pip:
      - pvcheetah
      - pvleopard
      - pvporcupine
      - pvrhino
      - sounddevice
--- a/platypush/plugins/sound/_manager/_main.py
+++ b/platypush/plugins/sound/_manager/_main.py
@ -247,10 +247,12 @@ class AudioManager:
        wait_start = time()
        for audio_thread in streams_to_stop:
            audio_thread.join(
-                timeout=max(0, timeout - (time() - wait_start))
+                timeout=(
                    max(0, timeout - (time() - wait_start))
                    if timeout is not None
                    else None
                )
            )
        # Remove references
        for audio_thread in streams_to_stop:
--- a/platypush/utils/mock/modules.py
+++ b/platypush/utils/mock/modules.py
@ -83,7 +83,9 @@ mock_imports = [
    "pmw3901",
    "psutil",
    "pvcheetah",
    "pvleopard",
    "pvporcupine",
    "pvrhino",
    "pyHS100",
    "pyaudio",
    "pychromecast",