[WIP] Added initial hotword integration.

This commit is contained in:
Fabio Manganiello 2024-04-07 22:42:01 +02:00
parent f0382c73ab
commit 01dec0b7a4
Signed by untrusted user: blacklight
GPG key ID: D90FBA7F76362774
7 changed files with 462 additions and 4 deletions

View file

@ -0,0 +1,170 @@
from typing import Optional, Sequence
from platypush.context import get_bus
from platypush.plugins import RunnablePlugin, action
from platypush.plugins.assistant import AssistantPlugin
from ._assistant import Assistant
# pylint: disable=too-many-ancestors
class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
"""
A voice assistant that runs on your device, based on the `Picovoice
<https://picovoice.ai/>`_ engine.
.. note:: You will need a PicoVoice account and a personal access key to
use this integration.
You can get your personal access key by signing up at the `Picovoice
console <https://console.picovoice.ai/>`_. You may be asked to submit a
reason for using the service (feel free to mention a personal Platypush
integration), and you will receive your personal access key.
You may also be asked to select which products you want to use. The default
configuration of this plugin requires the following:
* **Porcupine**: wake-word engine, if you want the device to listen for
a specific wake word in order to start the assistant.
* **Cheetah**: speech-to-text engine, if you want your voice
interactions to be transcribed into free text - either programmatically
or when triggered by the wake word. Or:
* **Rhino**: intent recognition engine, if you want to extract *intents*
out of your voice commands - for instance, the phrase "set the living
room temperature to 20 degrees" could be mapped to the intent with the
following parameters: ``intent``: ``set_temperature``, ``room``:
``living_room``, ``temperature``: ``20``.
* **Leopard**: speech-to-text engine aimed at offline transcription of
audio files rather than real-time transcription.
"""
def __init__(
self,
access_key: str,
hotword_enabled: bool = True,
stt_enabled: bool = True,
intent_enabled: bool = False,
keywords: Optional[Sequence[str]] = None,
keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None,
**kwargs,
):
"""
:param access_key: Your Picovoice access key. You can get it by signing
up at the `Picovoice console <https://console.picovoice.ai/>`.
:param hotword_enabled: Enable the wake-word engine (default: True).
.. note:: The wake-word engine requires you to add Porcupine to the
products available in your Picovoice account.
:param stt_enabled: Enable the speech-to-text engine (default: True).
.. note:: The speech-to-text engine requires you to add Cheetah to
the products available in your Picovoice account.
:param intent_enabled: Enable the intent recognition engine (default:
False).
.. note:: The intent recognition engine requires you to add Rhino
to the products available in your Picovoice account.
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
google``...). Either ``keywords`` or ``keyword_paths`` must be
provided if the wake-word engine is enabled. This list can include
any of the default Picovoice keywords (available on the `Picovoice
repository
<https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
:param keyword_paths: List of paths to the keyword files to listen for.
Custom keyword files can be created using the `Picovoice console
<https://console.picovoice.ai/ppn>`_ and downloaded from the
console itself.
:param keyword_model_path: If you are using a keyword file in a
non-English language, you can provide the path to the model file
for its language. Model files are available for all the supported
languages through the `Picovoice repository
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
"""
super().__init__(**kwargs)
self._assistant_args = {
'stop_event': self._should_stop,
'access_key': access_key,
'hotword_enabled': hotword_enabled,
'stt_enabled': stt_enabled,
'intent_enabled': intent_enabled,
'keywords': keywords,
'keyword_paths': keyword_paths,
'keyword_model_path': keyword_model_path,
}
@action
def start_conversation(self, *_, **__):
"""
Programmatically start a conversation with the assistant
"""
@action
def stop_conversation(self, *_, **__):
"""
Programmatically stop a running conversation with the assistant
"""
@action
def mute(self, *_, **__):
"""
Mute the microphone. Alias for :meth:`.set_mic_mute` with
``muted=True``.
"""
@action
def unmute(self, *_, **__):
"""
Unmute the microphone. Alias for :meth:`.set_mic_mute` with
``muted=False``.
"""
@action
def set_mic_mute(self, muted: bool):
"""
Programmatically mute/unmute the microphone.
:param muted: Set to True or False.
"""
@action
def toggle_mute(self, *_, **__):
"""
Toggle the mic mute state.
"""
@action
def send_text_query(self, *_, query: str, **__):
"""
Send a text query to the assistant.
This is equivalent to saying something to the assistant.
:param query: Query to be sent.
"""
def main(self):
while not self.should_stop():
self.logger.info('Starting Picovoice assistant')
with Assistant(**self._assistant_args) as assistant:
try:
for event in assistant:
if event:
get_bus().post(event)
except KeyboardInterrupt:
break
except Exception as e:
self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
self.wait_stop(5)
def stop(self):
try:
self.stop_conversation()
except RuntimeError:
pass
super().stop()
# vim:sw=4:ts=4:et:

View file

@ -0,0 +1,171 @@
import logging
import os
from threading import Event
from time import time
from typing import Any, Dict, Optional, Sequence
import pvcheetah
import pvleopard
import pvporcupine
import pvrhino
from platypush.message.event.assistant import HotwordDetectedEvent
from ._recorder import AudioRecorder
class Assistant:
"""
A facade class that wraps the Picovoice engines under an assistant API.
"""
def __init__(
self,
access_key: str,
stop_event: Event,
hotword_enabled: bool = True,
stt_enabled: bool = True,
intent_enabled: bool = False,
keywords: Optional[Sequence[str]] = None,
keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None,
frame_expiration: float = 3.0, # Don't process audio frames older than this
):
self.logger = logging.getLogger(__name__)
self._access_key = access_key
self._stop_event = stop_event
self.hotword_enabled = hotword_enabled
self.stt_enabled = stt_enabled
self.intent_enabled = intent_enabled
self.keywords = list(keywords or [])
self.keyword_paths = None
self.keyword_model_path = None
self.frame_expiration = frame_expiration
self._recorder = None
if hotword_enabled:
if keyword_paths:
keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
missing_paths = [
path for path in keyword_paths if not os.path.isfile(path)
]
if missing_paths:
raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
self.keyword_paths = keyword_paths
if keyword_model_path:
keyword_model_path = os.path.expanduser(keyword_model_path)
if not os.path.isfile(keyword_model_path):
raise FileNotFoundError(
f'Keyword model file not found: {keyword_model_path}'
)
self.keyword_model_path = keyword_model_path
self._cheetah: Optional[pvcheetah.Cheetah] = None
self._leopard: Optional[pvleopard.Leopard] = None
self._porcupine: Optional[pvporcupine.Porcupine] = None
self._rhino: Optional[pvrhino.Rhino] = None
def should_stop(self):
return self._stop_event.is_set()
def wait_stop(self):
self._stop_event.wait()
def _create_porcupine(self):
if not self.hotword_enabled:
return None
args: Dict[str, Any] = {'access_key': self._access_key}
if not (self.keywords or self.keyword_paths):
raise ValueError(
'You need to provide either a list of keywords or a list of '
'keyword paths if the wake-word engine is enabled'
)
if self.keywords:
args['keywords'] = self.keywords
if self.keyword_paths:
args['keyword_paths'] = self.keyword_paths
if self.keyword_model_path:
args['model_path'] = self.keyword_model_path
return pvporcupine.create(**args)
@property
def porcupine(self) -> Optional[pvporcupine.Porcupine]:
if not self._porcupine:
self._porcupine = self._create_porcupine()
return self._porcupine
def __enter__(self):
if self._recorder:
self.logger.info('A recording stream already exists')
elif self.porcupine:
self._recorder = AudioRecorder(
stop_event=self._stop_event,
sample_rate=self.porcupine.sample_rate,
frame_size=self.porcupine.frame_length,
channels=1,
)
self._recorder.__enter__()
return self
def __exit__(self, *_):
if self._recorder:
self._recorder.__exit__(*_)
self._recorder = None
if self._cheetah:
self._cheetah.delete()
self._cheetah = None
if self._leopard:
self._leopard.delete()
self._leopard = None
if self._porcupine:
self._porcupine.delete()
self._porcupine = None
if self._rhino:
self._rhino.delete()
self._rhino = None
def __iter__(self):
return self
def __next__(self):
has_data = False
if self.should_stop() or not self._recorder:
raise StopIteration
while not (self.should_stop() or has_data):
if self.porcupine: # TODO also check current state
data = self._recorder.read()
if data is None:
continue
frame, t = data
if time() - t > self.frame_expiration:
self.logger.info(
'Skipping audio frame older than %ss', self.frame_expiration
)
continue # The audio frame is too old
keyword_index = self.porcupine.process(frame)
if keyword_index is None:
continue # No keyword detected
if keyword_index >= 0 and self.keywords:
return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
raise StopIteration
# vim:sw=4:ts=4:et:

View file

@ -0,0 +1,77 @@
from collections import namedtuple
from logging import getLogger
from queue import Full, Queue
from threading import Event
from time import time
from typing import Optional
import sounddevice as sd
from platypush.utils import wait_for_either
AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
class AudioRecorder:
"""
Audio recorder component that uses the sounddevice library to record audio
from the microphone.
"""
def __init__(
self,
stop_event: Event,
sample_rate: int,
frame_size: int,
channels: int,
dtype: str = 'int16',
queue_size: int = 20,
):
self.logger = getLogger(__name__)
self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
self.frame_size = frame_size
self._stop_event = Event()
self._upstream_stop_event = stop_event
self.stream = sd.InputStream(
samplerate=sample_rate,
channels=channels,
dtype=dtype,
blocksize=frame_size,
callback=self._audio_callback,
)
def __enter__(self):
self._stop_event.clear()
self.stream.start()
return self
def __exit__(self, *_):
self.stop()
# self.stream.close()
def _audio_callback(self, indata, *_):
if self.should_stop():
return
try:
self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
except Full:
self.logger.warning('Audio queue is full, dropping audio frame')
def read(self, timeout: Optional[float] = None):
try:
return self._audio_queue.get(timeout=timeout)
except TimeoutError:
self.logger.debug('Audio queue is empty')
return None
def stop(self):
self._stop_event.set()
self.stream.stop()
def should_stop(self):
return self._stop_event.is_set() or self._upstream_stop_event.is_set()
def wait(self, timeout: Optional[float] = None):
wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)

View file

@ -0,0 +1,14 @@
from enum import Enum
class AssistantState(Enum):
"""
Possible states of the assistant.
"""
IDLE = 'idle'
DETECTING_HOTWORD = 'detecting_hotword'
DETECTING_SPEECH = 'detecting_speech'
# vim:sw=4:ts=4:et:

View file

@ -0,0 +1,22 @@
manifest:
package: platypush.plugins.picovoice
type: plugin
events:
- platypush.message.event.assistant.ConversationEndEvent
- platypush.message.event.assistant.ConversationStartEvent
- platypush.message.event.assistant.ConversationTimeoutEvent
- platypush.message.event.assistant.HotwordDetectedEvent
- platypush.message.event.assistant.MicMutedEvent
- platypush.message.event.assistant.MicUnmutedEvent
- platypush.message.event.assistant.NoResponseEvent
- platypush.message.event.assistant.ResponseEvent
- platypush.message.event.assistant.SpeechRecognizedEvent
install:
pacman:
- python-sounddevice
pip:
- pvcheetah
- pvleopard
- pvporcupine
- pvrhino
- sounddevice

View file

@ -247,9 +247,11 @@ class AudioManager:
wait_start = time()
for audio_thread in streams_to_stop:
audio_thread.join(
timeout=max(0, timeout - (time() - wait_start))
if timeout is not None
else None
timeout=(
max(0, timeout - (time() - wait_start))
if timeout is not None
else None
)
)
# Remove references

View file

@ -83,7 +83,9 @@ mock_imports = [
"pmw3901",
"psutil",
"pvcheetah",
"pvporcupine ",
"pvleopard",
"pvporcupine",
"pvrhino",
"pyHS100",
"pyaudio",
"pychromecast",