forked from platypush/platypush
[WIP] Added initial hotword integration.
This commit is contained in:
parent
f0382c73ab
commit
01dec0b7a4
7 changed files with 462 additions and 4 deletions
170
platypush/plugins/picovoice/__init__.py
Normal file
170
platypush/plugins/picovoice/__init__.py
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
from typing import Optional, Sequence
|
||||||
|
|
||||||
|
from platypush.context import get_bus
|
||||||
|
from platypush.plugins import RunnablePlugin, action
|
||||||
|
from platypush.plugins.assistant import AssistantPlugin
|
||||||
|
|
||||||
|
from ._assistant import Assistant
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=too-many-ancestors
|
||||||
|
class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
|
"""
|
||||||
|
A voice assistant that runs on your device, based on the `Picovoice
|
||||||
|
<https://picovoice.ai/>`_ engine.
|
||||||
|
|
||||||
|
.. note:: You will need a PicoVoice account and a personal access key to
|
||||||
|
use this integration.
|
||||||
|
|
||||||
|
You can get your personal access key by signing up at the `Picovoice
|
||||||
|
console <https://console.picovoice.ai/>`_. You may be asked to submit a
|
||||||
|
reason for using the service (feel free to mention a personal Platypush
|
||||||
|
integration), and you will receive your personal access key.
|
||||||
|
|
||||||
|
You may also be asked to select which products you want to use. The default
|
||||||
|
configuration of this plugin requires the following:
|
||||||
|
|
||||||
|
* **Porcupine**: wake-word engine, if you want the device to listen for
|
||||||
|
a specific wake word in order to start the assistant.
|
||||||
|
|
||||||
|
* **Cheetah**: speech-to-text engine, if you want your voice
|
||||||
|
interactions to be transcribed into free text - either programmatically
|
||||||
|
or when triggered by the wake word. Or:
|
||||||
|
|
||||||
|
* **Rhino**: intent recognition engine, if you want to extract *intents*
|
||||||
|
out of your voice commands - for instance, the phrase "set the living
|
||||||
|
room temperature to 20 degrees" could be mapped to the intent with the
|
||||||
|
following parameters: ``intent``: ``set_temperature``, ``room``:
|
||||||
|
``living_room``, ``temperature``: ``20``.
|
||||||
|
|
||||||
|
* **Leopard**: speech-to-text engine aimed at offline transcription of
|
||||||
|
audio files rather than real-time transcription.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
access_key: str,
|
||||||
|
hotword_enabled: bool = True,
|
||||||
|
stt_enabled: bool = True,
|
||||||
|
intent_enabled: bool = False,
|
||||||
|
keywords: Optional[Sequence[str]] = None,
|
||||||
|
keyword_paths: Optional[Sequence[str]] = None,
|
||||||
|
keyword_model_path: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param access_key: Your Picovoice access key. You can get it by signing
|
||||||
|
up at the `Picovoice console <https://console.picovoice.ai/>`.
|
||||||
|
:param hotword_enabled: Enable the wake-word engine (default: True).
|
||||||
|
.. note:: The wake-word engine requires you to add Porcupine to the
|
||||||
|
products available in your Picovoice account.
|
||||||
|
:param stt_enabled: Enable the speech-to-text engine (default: True).
|
||||||
|
.. note:: The speech-to-text engine requires you to add Cheetah to
|
||||||
|
the products available in your Picovoice account.
|
||||||
|
:param intent_enabled: Enable the intent recognition engine (default:
|
||||||
|
False).
|
||||||
|
.. note:: The intent recognition engine requires you to add Rhino
|
||||||
|
to the products available in your Picovoice account.
|
||||||
|
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
|
||||||
|
google``...). Either ``keywords`` or ``keyword_paths`` must be
|
||||||
|
provided if the wake-word engine is enabled. This list can include
|
||||||
|
any of the default Picovoice keywords (available on the `Picovoice
|
||||||
|
repository
|
||||||
|
<https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
|
||||||
|
:param keyword_paths: List of paths to the keyword files to listen for.
|
||||||
|
Custom keyword files can be created using the `Picovoice console
|
||||||
|
<https://console.picovoice.ai/ppn>`_ and downloaded from the
|
||||||
|
console itself.
|
||||||
|
:param keyword_model_path: If you are using a keyword file in a
|
||||||
|
non-English language, you can provide the path to the model file
|
||||||
|
for its language. Model files are available for all the supported
|
||||||
|
languages through the `Picovoice repository
|
||||||
|
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self._assistant_args = {
|
||||||
|
'stop_event': self._should_stop,
|
||||||
|
'access_key': access_key,
|
||||||
|
'hotword_enabled': hotword_enabled,
|
||||||
|
'stt_enabled': stt_enabled,
|
||||||
|
'intent_enabled': intent_enabled,
|
||||||
|
'keywords': keywords,
|
||||||
|
'keyword_paths': keyword_paths,
|
||||||
|
'keyword_model_path': keyword_model_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
@action
|
||||||
|
def start_conversation(self, *_, **__):
|
||||||
|
"""
|
||||||
|
Programmatically start a conversation with the assistant
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def stop_conversation(self, *_, **__):
|
||||||
|
"""
|
||||||
|
Programmatically stop a running conversation with the assistant
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def mute(self, *_, **__):
|
||||||
|
"""
|
||||||
|
Mute the microphone. Alias for :meth:`.set_mic_mute` with
|
||||||
|
``muted=True``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def unmute(self, *_, **__):
|
||||||
|
"""
|
||||||
|
Unmute the microphone. Alias for :meth:`.set_mic_mute` with
|
||||||
|
``muted=False``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def set_mic_mute(self, muted: bool):
|
||||||
|
"""
|
||||||
|
Programmatically mute/unmute the microphone.
|
||||||
|
|
||||||
|
:param muted: Set to True or False.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def toggle_mute(self, *_, **__):
|
||||||
|
"""
|
||||||
|
Toggle the mic mute state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@action
|
||||||
|
def send_text_query(self, *_, query: str, **__):
|
||||||
|
"""
|
||||||
|
Send a text query to the assistant.
|
||||||
|
|
||||||
|
This is equivalent to saying something to the assistant.
|
||||||
|
|
||||||
|
:param query: Query to be sent.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
while not self.should_stop():
|
||||||
|
self.logger.info('Starting Picovoice assistant')
|
||||||
|
with Assistant(**self._assistant_args) as assistant:
|
||||||
|
try:
|
||||||
|
for event in assistant:
|
||||||
|
if event:
|
||||||
|
get_bus().post(event)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
|
||||||
|
self.wait_stop(5)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
try:
|
||||||
|
self.stop_conversation()
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
super().stop()
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
171
platypush/plugins/picovoice/_assistant.py
Normal file
171
platypush/plugins/picovoice/_assistant.py
Normal file
|
@ -0,0 +1,171 @@
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from threading import Event
|
||||||
|
from time import time
|
||||||
|
from typing import Any, Dict, Optional, Sequence
|
||||||
|
|
||||||
|
import pvcheetah
|
||||||
|
import pvleopard
|
||||||
|
import pvporcupine
|
||||||
|
import pvrhino
|
||||||
|
|
||||||
|
from platypush.message.event.assistant import HotwordDetectedEvent
|
||||||
|
|
||||||
|
from ._recorder import AudioRecorder
|
||||||
|
|
||||||
|
|
||||||
|
class Assistant:
|
||||||
|
"""
|
||||||
|
A facade class that wraps the Picovoice engines under an assistant API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
access_key: str,
|
||||||
|
stop_event: Event,
|
||||||
|
hotword_enabled: bool = True,
|
||||||
|
stt_enabled: bool = True,
|
||||||
|
intent_enabled: bool = False,
|
||||||
|
keywords: Optional[Sequence[str]] = None,
|
||||||
|
keyword_paths: Optional[Sequence[str]] = None,
|
||||||
|
keyword_model_path: Optional[str] = None,
|
||||||
|
frame_expiration: float = 3.0, # Don't process audio frames older than this
|
||||||
|
):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self._access_key = access_key
|
||||||
|
self._stop_event = stop_event
|
||||||
|
self.hotword_enabled = hotword_enabled
|
||||||
|
self.stt_enabled = stt_enabled
|
||||||
|
self.intent_enabled = intent_enabled
|
||||||
|
self.keywords = list(keywords or [])
|
||||||
|
self.keyword_paths = None
|
||||||
|
self.keyword_model_path = None
|
||||||
|
self.frame_expiration = frame_expiration
|
||||||
|
self._recorder = None
|
||||||
|
|
||||||
|
if hotword_enabled:
|
||||||
|
if keyword_paths:
|
||||||
|
keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
|
||||||
|
missing_paths = [
|
||||||
|
path for path in keyword_paths if not os.path.isfile(path)
|
||||||
|
]
|
||||||
|
if missing_paths:
|
||||||
|
raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
|
||||||
|
|
||||||
|
self.keyword_paths = keyword_paths
|
||||||
|
|
||||||
|
if keyword_model_path:
|
||||||
|
keyword_model_path = os.path.expanduser(keyword_model_path)
|
||||||
|
if not os.path.isfile(keyword_model_path):
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f'Keyword model file not found: {keyword_model_path}'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.keyword_model_path = keyword_model_path
|
||||||
|
|
||||||
|
self._cheetah: Optional[pvcheetah.Cheetah] = None
|
||||||
|
self._leopard: Optional[pvleopard.Leopard] = None
|
||||||
|
self._porcupine: Optional[pvporcupine.Porcupine] = None
|
||||||
|
self._rhino: Optional[pvrhino.Rhino] = None
|
||||||
|
|
||||||
|
def should_stop(self):
|
||||||
|
return self._stop_event.is_set()
|
||||||
|
|
||||||
|
def wait_stop(self):
|
||||||
|
self._stop_event.wait()
|
||||||
|
|
||||||
|
def _create_porcupine(self):
|
||||||
|
if not self.hotword_enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
args: Dict[str, Any] = {'access_key': self._access_key}
|
||||||
|
if not (self.keywords or self.keyword_paths):
|
||||||
|
raise ValueError(
|
||||||
|
'You need to provide either a list of keywords or a list of '
|
||||||
|
'keyword paths if the wake-word engine is enabled'
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.keywords:
|
||||||
|
args['keywords'] = self.keywords
|
||||||
|
if self.keyword_paths:
|
||||||
|
args['keyword_paths'] = self.keyword_paths
|
||||||
|
if self.keyword_model_path:
|
||||||
|
args['model_path'] = self.keyword_model_path
|
||||||
|
|
||||||
|
return pvporcupine.create(**args)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def porcupine(self) -> Optional[pvporcupine.Porcupine]:
|
||||||
|
if not self._porcupine:
|
||||||
|
self._porcupine = self._create_porcupine()
|
||||||
|
|
||||||
|
return self._porcupine
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
if self._recorder:
|
||||||
|
self.logger.info('A recording stream already exists')
|
||||||
|
elif self.porcupine:
|
||||||
|
self._recorder = AudioRecorder(
|
||||||
|
stop_event=self._stop_event,
|
||||||
|
sample_rate=self.porcupine.sample_rate,
|
||||||
|
frame_size=self.porcupine.frame_length,
|
||||||
|
channels=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._recorder.__enter__()
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *_):
|
||||||
|
if self._recorder:
|
||||||
|
self._recorder.__exit__(*_)
|
||||||
|
self._recorder = None
|
||||||
|
|
||||||
|
if self._cheetah:
|
||||||
|
self._cheetah.delete()
|
||||||
|
self._cheetah = None
|
||||||
|
|
||||||
|
if self._leopard:
|
||||||
|
self._leopard.delete()
|
||||||
|
self._leopard = None
|
||||||
|
|
||||||
|
if self._porcupine:
|
||||||
|
self._porcupine.delete()
|
||||||
|
self._porcupine = None
|
||||||
|
|
||||||
|
if self._rhino:
|
||||||
|
self._rhino.delete()
|
||||||
|
self._rhino = None
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
has_data = False
|
||||||
|
if self.should_stop() or not self._recorder:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
while not (self.should_stop() or has_data):
|
||||||
|
if self.porcupine: # TODO also check current state
|
||||||
|
data = self._recorder.read()
|
||||||
|
if data is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
frame, t = data
|
||||||
|
if time() - t > self.frame_expiration:
|
||||||
|
self.logger.info(
|
||||||
|
'Skipping audio frame older than %ss', self.frame_expiration
|
||||||
|
)
|
||||||
|
continue # The audio frame is too old
|
||||||
|
|
||||||
|
keyword_index = self.porcupine.process(frame)
|
||||||
|
if keyword_index is None:
|
||||||
|
continue # No keyword detected
|
||||||
|
|
||||||
|
if keyword_index >= 0 and self.keywords:
|
||||||
|
return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
|
||||||
|
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
77
platypush/plugins/picovoice/_recorder.py
Normal file
77
platypush/plugins/picovoice/_recorder.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
from collections import namedtuple
|
||||||
|
from logging import getLogger
|
||||||
|
from queue import Full, Queue
|
||||||
|
from threading import Event
|
||||||
|
from time import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
from platypush.utils import wait_for_either
|
||||||
|
|
||||||
|
|
||||||
|
AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
|
||||||
|
|
||||||
|
|
||||||
|
class AudioRecorder:
|
||||||
|
"""
|
||||||
|
Audio recorder component that uses the sounddevice library to record audio
|
||||||
|
from the microphone.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
stop_event: Event,
|
||||||
|
sample_rate: int,
|
||||||
|
frame_size: int,
|
||||||
|
channels: int,
|
||||||
|
dtype: str = 'int16',
|
||||||
|
queue_size: int = 20,
|
||||||
|
):
|
||||||
|
self.logger = getLogger(__name__)
|
||||||
|
self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
|
||||||
|
self.frame_size = frame_size
|
||||||
|
self._stop_event = Event()
|
||||||
|
self._upstream_stop_event = stop_event
|
||||||
|
self.stream = sd.InputStream(
|
||||||
|
samplerate=sample_rate,
|
||||||
|
channels=channels,
|
||||||
|
dtype=dtype,
|
||||||
|
blocksize=frame_size,
|
||||||
|
callback=self._audio_callback,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self._stop_event.clear()
|
||||||
|
self.stream.start()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *_):
|
||||||
|
self.stop()
|
||||||
|
# self.stream.close()
|
||||||
|
|
||||||
|
def _audio_callback(self, indata, *_):
|
||||||
|
if self.should_stop():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
|
||||||
|
except Full:
|
||||||
|
self.logger.warning('Audio queue is full, dropping audio frame')
|
||||||
|
|
||||||
|
def read(self, timeout: Optional[float] = None):
|
||||||
|
try:
|
||||||
|
return self._audio_queue.get(timeout=timeout)
|
||||||
|
except TimeoutError:
|
||||||
|
self.logger.debug('Audio queue is empty')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self._stop_event.set()
|
||||||
|
self.stream.stop()
|
||||||
|
|
||||||
|
def should_stop(self):
|
||||||
|
return self._stop_event.is_set() or self._upstream_stop_event.is_set()
|
||||||
|
|
||||||
|
def wait(self, timeout: Optional[float] = None):
|
||||||
|
wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
|
14
platypush/plugins/picovoice/_state.py
Normal file
14
platypush/plugins/picovoice/_state.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantState(Enum):
|
||||||
|
"""
|
||||||
|
Possible states of the assistant.
|
||||||
|
"""
|
||||||
|
|
||||||
|
IDLE = 'idle'
|
||||||
|
DETECTING_HOTWORD = 'detecting_hotword'
|
||||||
|
DETECTING_SPEECH = 'detecting_speech'
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
22
platypush/plugins/picovoice/manifest.yaml
Normal file
22
platypush/plugins/picovoice/manifest.yaml
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
manifest:
|
||||||
|
package: platypush.plugins.picovoice
|
||||||
|
type: plugin
|
||||||
|
events:
|
||||||
|
- platypush.message.event.assistant.ConversationEndEvent
|
||||||
|
- platypush.message.event.assistant.ConversationStartEvent
|
||||||
|
- platypush.message.event.assistant.ConversationTimeoutEvent
|
||||||
|
- platypush.message.event.assistant.HotwordDetectedEvent
|
||||||
|
- platypush.message.event.assistant.MicMutedEvent
|
||||||
|
- platypush.message.event.assistant.MicUnmutedEvent
|
||||||
|
- platypush.message.event.assistant.NoResponseEvent
|
||||||
|
- platypush.message.event.assistant.ResponseEvent
|
||||||
|
- platypush.message.event.assistant.SpeechRecognizedEvent
|
||||||
|
install:
|
||||||
|
pacman:
|
||||||
|
- python-sounddevice
|
||||||
|
pip:
|
||||||
|
- pvcheetah
|
||||||
|
- pvleopard
|
||||||
|
- pvporcupine
|
||||||
|
- pvrhino
|
||||||
|
- sounddevice
|
|
@ -247,10 +247,12 @@ class AudioManager:
|
||||||
wait_start = time()
|
wait_start = time()
|
||||||
for audio_thread in streams_to_stop:
|
for audio_thread in streams_to_stop:
|
||||||
audio_thread.join(
|
audio_thread.join(
|
||||||
timeout=max(0, timeout - (time() - wait_start))
|
timeout=(
|
||||||
|
max(0, timeout - (time() - wait_start))
|
||||||
if timeout is not None
|
if timeout is not None
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Remove references
|
# Remove references
|
||||||
for audio_thread in streams_to_stop:
|
for audio_thread in streams_to_stop:
|
||||||
|
|
|
@ -83,7 +83,9 @@ mock_imports = [
|
||||||
"pmw3901",
|
"pmw3901",
|
||||||
"psutil",
|
"psutil",
|
||||||
"pvcheetah",
|
"pvcheetah",
|
||||||
|
"pvleopard",
|
||||||
"pvporcupine",
|
"pvporcupine",
|
||||||
|
"pvrhino",
|
||||||
"pyHS100",
|
"pyHS100",
|
||||||
"pyaudio",
|
"pyaudio",
|
||||||
"pychromecast",
|
"pychromecast",
|
||||||
|
|
Loading…
Reference in a new issue