From 01dec0b7a4a4c3c136c68513991e510f58c5ba2f Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sun, 7 Apr 2024 22:42:01 +0200 Subject: [PATCH] [WIP] Added initial hotword integration. --- platypush/plugins/picovoice/__init__.py | 170 +++++++++++++++++++++ platypush/plugins/picovoice/_assistant.py | 171 ++++++++++++++++++++++ platypush/plugins/picovoice/_recorder.py | 77 ++++++++++ platypush/plugins/picovoice/_state.py | 14 ++ platypush/plugins/picovoice/manifest.yaml | 22 +++ platypush/plugins/sound/_manager/_main.py | 8 +- platypush/utils/mock/modules.py | 4 +- 7 files changed, 462 insertions(+), 4 deletions(-) create mode 100644 platypush/plugins/picovoice/__init__.py create mode 100644 platypush/plugins/picovoice/_assistant.py create mode 100644 platypush/plugins/picovoice/_recorder.py create mode 100644 platypush/plugins/picovoice/_state.py create mode 100644 platypush/plugins/picovoice/manifest.yaml diff --git a/platypush/plugins/picovoice/__init__.py b/platypush/plugins/picovoice/__init__.py new file mode 100644 index 00000000..a861f66b --- /dev/null +++ b/platypush/plugins/picovoice/__init__.py @@ -0,0 +1,170 @@ +from typing import Optional, Sequence + +from platypush.context import get_bus +from platypush.plugins import RunnablePlugin, action +from platypush.plugins.assistant import AssistantPlugin + +from ._assistant import Assistant + + +# pylint: disable=too-many-ancestors +class PicovoicePlugin(AssistantPlugin, RunnablePlugin): + """ + A voice assistant that runs on your device, based on the `Picovoice + `_ engine. + + .. note:: You will need a PicoVoice account and a personal access key to + use this integration. + + You can get your personal access key by signing up at the `Picovoice + console `_. You may be asked to submit a + reason for using the service (feel free to mention a personal Platypush + integration), and you will receive your personal access key. + + You may also be asked to select which products you want to use. The default + configuration of this plugin requires the following: + + * **Porcupine**: wake-word engine, if you want the device to listen for + a specific wake word in order to start the assistant. + + * **Cheetah**: speech-to-text engine, if you want your voice + interactions to be transcribed into free text - either programmatically + or when triggered by the wake word. Or: + + * **Rhino**: intent recognition engine, if you want to extract *intents* + out of your voice commands - for instance, the phrase "set the living + room temperature to 20 degrees" could be mapped to the intent with the + following parameters: ``intent``: ``set_temperature``, ``room``: + ``living_room``, ``temperature``: ``20``. + + * **Leopard**: speech-to-text engine aimed at offline transcription of + audio files rather than real-time transcription. + + """ + + def __init__( + self, + access_key: str, + hotword_enabled: bool = True, + stt_enabled: bool = True, + intent_enabled: bool = False, + keywords: Optional[Sequence[str]] = None, + keyword_paths: Optional[Sequence[str]] = None, + keyword_model_path: Optional[str] = None, + **kwargs, + ): + """ + :param access_key: Your Picovoice access key. You can get it by signing + up at the `Picovoice console `. + :param hotword_enabled: Enable the wake-word engine (default: True). + .. note:: The wake-word engine requires you to add Porcupine to the + products available in your Picovoice account. + :param stt_enabled: Enable the speech-to-text engine (default: True). + .. note:: The speech-to-text engine requires you to add Cheetah to + the products available in your Picovoice account. + :param intent_enabled: Enable the intent recognition engine (default: + False). + .. note:: The intent recognition engine requires you to add Rhino + to the products available in your Picovoice account. + :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok + google``...). Either ``keywords`` or ``keyword_paths`` must be + provided if the wake-word engine is enabled. This list can include + any of the default Picovoice keywords (available on the `Picovoice + repository + `_). + :param keyword_paths: List of paths to the keyword files to listen for. + Custom keyword files can be created using the `Picovoice console + `_ and downloaded from the + console itself. + :param keyword_model_path: If you are using a keyword file in a + non-English language, you can provide the path to the model file + for its language. Model files are available for all the supported + languages through the `Picovoice repository + `_. + """ + super().__init__(**kwargs) + self._assistant_args = { + 'stop_event': self._should_stop, + 'access_key': access_key, + 'hotword_enabled': hotword_enabled, + 'stt_enabled': stt_enabled, + 'intent_enabled': intent_enabled, + 'keywords': keywords, + 'keyword_paths': keyword_paths, + 'keyword_model_path': keyword_model_path, + } + + @action + def start_conversation(self, *_, **__): + """ + Programmatically start a conversation with the assistant + """ + + @action + def stop_conversation(self, *_, **__): + """ + Programmatically stop a running conversation with the assistant + """ + + @action + def mute(self, *_, **__): + """ + Mute the microphone. Alias for :meth:`.set_mic_mute` with + ``muted=True``. + """ + + @action + def unmute(self, *_, **__): + """ + Unmute the microphone. Alias for :meth:`.set_mic_mute` with + ``muted=False``. + """ + + @action + def set_mic_mute(self, muted: bool): + """ + Programmatically mute/unmute the microphone. + + :param muted: Set to True or False. + """ + + @action + def toggle_mute(self, *_, **__): + """ + Toggle the mic mute state. + """ + + @action + def send_text_query(self, *_, query: str, **__): + """ + Send a text query to the assistant. + + This is equivalent to saying something to the assistant. + + :param query: Query to be sent. + """ + + def main(self): + while not self.should_stop(): + self.logger.info('Starting Picovoice assistant') + with Assistant(**self._assistant_args) as assistant: + try: + for event in assistant: + if event: + get_bus().post(event) + except KeyboardInterrupt: + break + except Exception as e: + self.logger.error('Picovoice assistant error: %s', e, exc_info=True) + self.wait_stop(5) + + def stop(self): + try: + self.stop_conversation() + except RuntimeError: + pass + + super().stop() + + +# vim:sw=4:ts=4:et: diff --git a/platypush/plugins/picovoice/_assistant.py b/platypush/plugins/picovoice/_assistant.py new file mode 100644 index 00000000..5181aa57 --- /dev/null +++ b/platypush/plugins/picovoice/_assistant.py @@ -0,0 +1,171 @@ +import logging +import os +from threading import Event +from time import time +from typing import Any, Dict, Optional, Sequence + +import pvcheetah +import pvleopard +import pvporcupine +import pvrhino + +from platypush.message.event.assistant import HotwordDetectedEvent + +from ._recorder import AudioRecorder + + +class Assistant: + """ + A facade class that wraps the Picovoice engines under an assistant API. + """ + + def __init__( + self, + access_key: str, + stop_event: Event, + hotword_enabled: bool = True, + stt_enabled: bool = True, + intent_enabled: bool = False, + keywords: Optional[Sequence[str]] = None, + keyword_paths: Optional[Sequence[str]] = None, + keyword_model_path: Optional[str] = None, + frame_expiration: float = 3.0, # Don't process audio frames older than this + ): + self.logger = logging.getLogger(__name__) + self._access_key = access_key + self._stop_event = stop_event + self.hotword_enabled = hotword_enabled + self.stt_enabled = stt_enabled + self.intent_enabled = intent_enabled + self.keywords = list(keywords or []) + self.keyword_paths = None + self.keyword_model_path = None + self.frame_expiration = frame_expiration + self._recorder = None + + if hotword_enabled: + if keyword_paths: + keyword_paths = [os.path.expanduser(path) for path in keyword_paths] + missing_paths = [ + path for path in keyword_paths if not os.path.isfile(path) + ] + if missing_paths: + raise FileNotFoundError(f'Keyword files not found: {missing_paths}') + + self.keyword_paths = keyword_paths + + if keyword_model_path: + keyword_model_path = os.path.expanduser(keyword_model_path) + if not os.path.isfile(keyword_model_path): + raise FileNotFoundError( + f'Keyword model file not found: {keyword_model_path}' + ) + + self.keyword_model_path = keyword_model_path + + self._cheetah: Optional[pvcheetah.Cheetah] = None + self._leopard: Optional[pvleopard.Leopard] = None + self._porcupine: Optional[pvporcupine.Porcupine] = None + self._rhino: Optional[pvrhino.Rhino] = None + + def should_stop(self): + return self._stop_event.is_set() + + def wait_stop(self): + self._stop_event.wait() + + def _create_porcupine(self): + if not self.hotword_enabled: + return None + + args: Dict[str, Any] = {'access_key': self._access_key} + if not (self.keywords or self.keyword_paths): + raise ValueError( + 'You need to provide either a list of keywords or a list of ' + 'keyword paths if the wake-word engine is enabled' + ) + + if self.keywords: + args['keywords'] = self.keywords + if self.keyword_paths: + args['keyword_paths'] = self.keyword_paths + if self.keyword_model_path: + args['model_path'] = self.keyword_model_path + + return pvporcupine.create(**args) + + @property + def porcupine(self) -> Optional[pvporcupine.Porcupine]: + if not self._porcupine: + self._porcupine = self._create_porcupine() + + return self._porcupine + + def __enter__(self): + if self._recorder: + self.logger.info('A recording stream already exists') + elif self.porcupine: + self._recorder = AudioRecorder( + stop_event=self._stop_event, + sample_rate=self.porcupine.sample_rate, + frame_size=self.porcupine.frame_length, + channels=1, + ) + + self._recorder.__enter__() + + return self + + def __exit__(self, *_): + if self._recorder: + self._recorder.__exit__(*_) + self._recorder = None + + if self._cheetah: + self._cheetah.delete() + self._cheetah = None + + if self._leopard: + self._leopard.delete() + self._leopard = None + + if self._porcupine: + self._porcupine.delete() + self._porcupine = None + + if self._rhino: + self._rhino.delete() + self._rhino = None + + def __iter__(self): + return self + + def __next__(self): + has_data = False + if self.should_stop() or not self._recorder: + raise StopIteration + + while not (self.should_stop() or has_data): + if self.porcupine: # TODO also check current state + data = self._recorder.read() + if data is None: + continue + + frame, t = data + if time() - t > self.frame_expiration: + self.logger.info( + 'Skipping audio frame older than %ss', self.frame_expiration + ) + continue # The audio frame is too old + + keyword_index = self.porcupine.process(frame) + if keyword_index is None: + continue # No keyword detected + + if keyword_index >= 0 and self.keywords: + return HotwordDetectedEvent(hotword=self.keywords[keyword_index]) + + raise StopIteration + + +# vim:sw=4:ts=4:et: diff --git a/platypush/plugins/picovoice/_recorder.py b/platypush/plugins/picovoice/_recorder.py new file mode 100644 index 00000000..9df81e7c --- /dev/null +++ b/platypush/plugins/picovoice/_recorder.py @@ -0,0 +1,77 @@ +from collections import namedtuple +from logging import getLogger +from queue import Full, Queue +from threading import Event +from time import time +from typing import Optional + +import sounddevice as sd + +from platypush.utils import wait_for_either + + +AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp']) + + +class AudioRecorder: + """ + Audio recorder component that uses the sounddevice library to record audio + from the microphone. + """ + + def __init__( + self, + stop_event: Event, + sample_rate: int, + frame_size: int, + channels: int, + dtype: str = 'int16', + queue_size: int = 20, + ): + self.logger = getLogger(__name__) + self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size) + self.frame_size = frame_size + self._stop_event = Event() + self._upstream_stop_event = stop_event + self.stream = sd.InputStream( + samplerate=sample_rate, + channels=channels, + dtype=dtype, + blocksize=frame_size, + callback=self._audio_callback, + ) + + def __enter__(self): + self._stop_event.clear() + self.stream.start() + return self + + def __exit__(self, *_): + self.stop() + # self.stream.close() + + def _audio_callback(self, indata, *_): + if self.should_stop(): + return + + try: + self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time())) + except Full: + self.logger.warning('Audio queue is full, dropping audio frame') + + def read(self, timeout: Optional[float] = None): + try: + return self._audio_queue.get(timeout=timeout) + except TimeoutError: + self.logger.debug('Audio queue is empty') + return None + + def stop(self): + self._stop_event.set() + self.stream.stop() + + def should_stop(self): + return self._stop_event.is_set() or self._upstream_stop_event.is_set() + + def wait(self, timeout: Optional[float] = None): + wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout) diff --git a/platypush/plugins/picovoice/_state.py b/platypush/plugins/picovoice/_state.py new file mode 100644 index 00000000..e0eb7e71 --- /dev/null +++ b/platypush/plugins/picovoice/_state.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class AssistantState(Enum): + """ + Possible states of the assistant. + """ + + IDLE = 'idle' + DETECTING_HOTWORD = 'detecting_hotword' + DETECTING_SPEECH = 'detecting_speech' + + +# vim:sw=4:ts=4:et: diff --git a/platypush/plugins/picovoice/manifest.yaml b/platypush/plugins/picovoice/manifest.yaml new file mode 100644 index 00000000..9e9afc06 --- /dev/null +++ b/platypush/plugins/picovoice/manifest.yaml @@ -0,0 +1,22 @@ +manifest: + package: platypush.plugins.picovoice + type: plugin + events: + - platypush.message.event.assistant.ConversationEndEvent + - platypush.message.event.assistant.ConversationStartEvent + - platypush.message.event.assistant.ConversationTimeoutEvent + - platypush.message.event.assistant.HotwordDetectedEvent + - platypush.message.event.assistant.MicMutedEvent + - platypush.message.event.assistant.MicUnmutedEvent + - platypush.message.event.assistant.NoResponseEvent + - platypush.message.event.assistant.ResponseEvent + - platypush.message.event.assistant.SpeechRecognizedEvent + install: + pacman: + - python-sounddevice + pip: + - pvcheetah + - pvleopard + - pvporcupine + - pvrhino + - sounddevice diff --git a/platypush/plugins/sound/_manager/_main.py b/platypush/plugins/sound/_manager/_main.py index 9a1d96cf..4dea95df 100644 --- a/platypush/plugins/sound/_manager/_main.py +++ b/platypush/plugins/sound/_manager/_main.py @@ -247,9 +247,11 @@ class AudioManager: wait_start = time() for audio_thread in streams_to_stop: audio_thread.join( - timeout=max(0, timeout - (time() - wait_start)) - if timeout is not None - else None + timeout=( + max(0, timeout - (time() - wait_start)) + if timeout is not None + else None + ) ) # Remove references diff --git a/platypush/utils/mock/modules.py b/platypush/utils/mock/modules.py index 6af74355..eb6149f9 100644 --- a/platypush/utils/mock/modules.py +++ b/platypush/utils/mock/modules.py @@ -83,7 +83,9 @@ mock_imports = [ "pmw3901", "psutil", "pvcheetah", - "pvporcupine ", + "pvleopard", + "pvporcupine", + "pvrhino", "pyHS100", "pyaudio", "pychromecast",