forked from platypush/platypush
[WIP] Added initial hotword integration.
This commit is contained in:
parent
44f9c03bf3
commit
a9498ea191
7 changed files with 462 additions and 4 deletions
170
platypush/plugins/picovoice/__init__.py
Normal file
170
platypush/plugins/picovoice/__init__.py
Normal file
|
@ -0,0 +1,170 @@
|
|||
from typing import Optional, Sequence
|
||||
|
||||
from platypush.context import get_bus
|
||||
from platypush.plugins import RunnablePlugin, action
|
||||
from platypush.plugins.assistant import AssistantPlugin
|
||||
|
||||
from ._assistant import Assistant
|
||||
|
||||
|
||||
# pylint: disable=too-many-ancestors
|
||||
class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||
"""
|
||||
A voice assistant that runs on your device, based on the `Picovoice
|
||||
<https://picovoice.ai/>`_ engine.
|
||||
|
||||
.. note:: You will need a PicoVoice account and a personal access key to
|
||||
use this integration.
|
||||
|
||||
You can get your personal access key by signing up at the `Picovoice
|
||||
console <https://console.picovoice.ai/>`_. You may be asked to submit a
|
||||
reason for using the service (feel free to mention a personal Platypush
|
||||
integration), and you will receive your personal access key.
|
||||
|
||||
You may also be asked to select which products you want to use. The default
|
||||
configuration of this plugin requires the following:
|
||||
|
||||
* **Porcupine**: wake-word engine, if you want the device to listen for
|
||||
a specific wake word in order to start the assistant.
|
||||
|
||||
* **Cheetah**: speech-to-text engine, if you want your voice
|
||||
interactions to be transcribed into free text - either programmatically
|
||||
or when triggered by the wake word. Or:
|
||||
|
||||
* **Rhino**: intent recognition engine, if you want to extract *intents*
|
||||
out of your voice commands - for instance, the phrase "set the living
|
||||
room temperature to 20 degrees" could be mapped to the intent with the
|
||||
following parameters: ``intent``: ``set_temperature``, ``room``:
|
||||
``living_room``, ``temperature``: ``20``.
|
||||
|
||||
* **Leopard**: speech-to-text engine aimed at offline transcription of
|
||||
audio files rather than real-time transcription.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
access_key: str,
|
||||
hotword_enabled: bool = True,
|
||||
stt_enabled: bool = True,
|
||||
intent_enabled: bool = False,
|
||||
keywords: Optional[Sequence[str]] = None,
|
||||
keyword_paths: Optional[Sequence[str]] = None,
|
||||
keyword_model_path: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
:param access_key: Your Picovoice access key. You can get it by signing
|
||||
up at the `Picovoice console <https://console.picovoice.ai/>`.
|
||||
:param hotword_enabled: Enable the wake-word engine (default: True).
|
||||
.. note:: The wake-word engine requires you to add Porcupine to the
|
||||
products available in your Picovoice account.
|
||||
:param stt_enabled: Enable the speech-to-text engine (default: True).
|
||||
.. note:: The speech-to-text engine requires you to add Cheetah to
|
||||
the products available in your Picovoice account.
|
||||
:param intent_enabled: Enable the intent recognition engine (default:
|
||||
False).
|
||||
.. note:: The intent recognition engine requires you to add Rhino
|
||||
to the products available in your Picovoice account.
|
||||
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
|
||||
google``...). Either ``keywords`` or ``keyword_paths`` must be
|
||||
provided if the wake-word engine is enabled. This list can include
|
||||
any of the default Picovoice keywords (available on the `Picovoice
|
||||
repository
|
||||
<https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
|
||||
:param keyword_paths: List of paths to the keyword files to listen for.
|
||||
Custom keyword files can be created using the `Picovoice console
|
||||
<https://console.picovoice.ai/ppn>`_ and downloaded from the
|
||||
console itself.
|
||||
:param keyword_model_path: If you are using a keyword file in a
|
||||
non-English language, you can provide the path to the model file
|
||||
for its language. Model files are available for all the supported
|
||||
languages through the `Picovoice repository
|
||||
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._assistant_args = {
|
||||
'stop_event': self._should_stop,
|
||||
'access_key': access_key,
|
||||
'hotword_enabled': hotword_enabled,
|
||||
'stt_enabled': stt_enabled,
|
||||
'intent_enabled': intent_enabled,
|
||||
'keywords': keywords,
|
||||
'keyword_paths': keyword_paths,
|
||||
'keyword_model_path': keyword_model_path,
|
||||
}
|
||||
|
||||
@action
|
||||
def start_conversation(self, *_, **__):
|
||||
"""
|
||||
Programmatically start a conversation with the assistant
|
||||
"""
|
||||
|
||||
@action
|
||||
def stop_conversation(self, *_, **__):
|
||||
"""
|
||||
Programmatically stop a running conversation with the assistant
|
||||
"""
|
||||
|
||||
@action
|
||||
def mute(self, *_, **__):
|
||||
"""
|
||||
Mute the microphone. Alias for :meth:`.set_mic_mute` with
|
||||
``muted=True``.
|
||||
"""
|
||||
|
||||
@action
|
||||
def unmute(self, *_, **__):
|
||||
"""
|
||||
Unmute the microphone. Alias for :meth:`.set_mic_mute` with
|
||||
``muted=False``.
|
||||
"""
|
||||
|
||||
@action
|
||||
def set_mic_mute(self, muted: bool):
|
||||
"""
|
||||
Programmatically mute/unmute the microphone.
|
||||
|
||||
:param muted: Set to True or False.
|
||||
"""
|
||||
|
||||
@action
|
||||
def toggle_mute(self, *_, **__):
|
||||
"""
|
||||
Toggle the mic mute state.
|
||||
"""
|
||||
|
||||
@action
|
||||
def send_text_query(self, *_, query: str, **__):
|
||||
"""
|
||||
Send a text query to the assistant.
|
||||
|
||||
This is equivalent to saying something to the assistant.
|
||||
|
||||
:param query: Query to be sent.
|
||||
"""
|
||||
|
||||
def main(self):
|
||||
while not self.should_stop():
|
||||
self.logger.info('Starting Picovoice assistant')
|
||||
with Assistant(**self._assistant_args) as assistant:
|
||||
try:
|
||||
for event in assistant:
|
||||
if event:
|
||||
get_bus().post(event)
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
|
||||
self.wait_stop(5)
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
self.stop_conversation()
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
super().stop()
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
171
platypush/plugins/picovoice/_assistant.py
Normal file
171
platypush/plugins/picovoice/_assistant.py
Normal file
|
@ -0,0 +1,171 @@
|
|||
import logging
|
||||
import os
|
||||
from threading import Event
|
||||
from time import time
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
|
||||
import pvcheetah
|
||||
import pvleopard
|
||||
import pvporcupine
|
||||
import pvrhino
|
||||
|
||||
from platypush.message.event.assistant import HotwordDetectedEvent
|
||||
|
||||
from ._recorder import AudioRecorder
|
||||
|
||||
|
||||
class Assistant:
|
||||
"""
|
||||
A facade class that wraps the Picovoice engines under an assistant API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
access_key: str,
|
||||
stop_event: Event,
|
||||
hotword_enabled: bool = True,
|
||||
stt_enabled: bool = True,
|
||||
intent_enabled: bool = False,
|
||||
keywords: Optional[Sequence[str]] = None,
|
||||
keyword_paths: Optional[Sequence[str]] = None,
|
||||
keyword_model_path: Optional[str] = None,
|
||||
frame_expiration: float = 3.0, # Don't process audio frames older than this
|
||||
):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._access_key = access_key
|
||||
self._stop_event = stop_event
|
||||
self.hotword_enabled = hotword_enabled
|
||||
self.stt_enabled = stt_enabled
|
||||
self.intent_enabled = intent_enabled
|
||||
self.keywords = list(keywords or [])
|
||||
self.keyword_paths = None
|
||||
self.keyword_model_path = None
|
||||
self.frame_expiration = frame_expiration
|
||||
self._recorder = None
|
||||
|
||||
if hotword_enabled:
|
||||
if keyword_paths:
|
||||
keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
|
||||
missing_paths = [
|
||||
path for path in keyword_paths if not os.path.isfile(path)
|
||||
]
|
||||
if missing_paths:
|
||||
raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
|
||||
|
||||
self.keyword_paths = keyword_paths
|
||||
|
||||
if keyword_model_path:
|
||||
keyword_model_path = os.path.expanduser(keyword_model_path)
|
||||
if not os.path.isfile(keyword_model_path):
|
||||
raise FileNotFoundError(
|
||||
f'Keyword model file not found: {keyword_model_path}'
|
||||
)
|
||||
|
||||
self.keyword_model_path = keyword_model_path
|
||||
|
||||
self._cheetah: Optional[pvcheetah.Cheetah] = None
|
||||
self._leopard: Optional[pvleopard.Leopard] = None
|
||||
self._porcupine: Optional[pvporcupine.Porcupine] = None
|
||||
self._rhino: Optional[pvrhino.Rhino] = None
|
||||
|
||||
def should_stop(self):
|
||||
return self._stop_event.is_set()
|
||||
|
||||
def wait_stop(self):
|
||||
self._stop_event.wait()
|
||||
|
||||
def _create_porcupine(self):
|
||||
if not self.hotword_enabled:
|
||||
return None
|
||||
|
||||
args: Dict[str, Any] = {'access_key': self._access_key}
|
||||
if not (self.keywords or self.keyword_paths):
|
||||
raise ValueError(
|
||||
'You need to provide either a list of keywords or a list of '
|
||||
'keyword paths if the wake-word engine is enabled'
|
||||
)
|
||||
|
||||
if self.keywords:
|
||||
args['keywords'] = self.keywords
|
||||
if self.keyword_paths:
|
||||
args['keyword_paths'] = self.keyword_paths
|
||||
if self.keyword_model_path:
|
||||
args['model_path'] = self.keyword_model_path
|
||||
|
||||
return pvporcupine.create(**args)
|
||||
|
||||
@property
|
||||
def porcupine(self) -> Optional[pvporcupine.Porcupine]:
|
||||
if not self._porcupine:
|
||||
self._porcupine = self._create_porcupine()
|
||||
|
||||
return self._porcupine
|
||||
|
||||
def __enter__(self):
|
||||
if self._recorder:
|
||||
self.logger.info('A recording stream already exists')
|
||||
elif self.porcupine:
|
||||
self._recorder = AudioRecorder(
|
||||
stop_event=self._stop_event,
|
||||
sample_rate=self.porcupine.sample_rate,
|
||||
frame_size=self.porcupine.frame_length,
|
||||
channels=1,
|
||||
)
|
||||
|
||||
self._recorder.__enter__()
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, *_):
|
||||
if self._recorder:
|
||||
self._recorder.__exit__(*_)
|
||||
self._recorder = None
|
||||
|
||||
if self._cheetah:
|
||||
self._cheetah.delete()
|
||||
self._cheetah = None
|
||||
|
||||
if self._leopard:
|
||||
self._leopard.delete()
|
||||
self._leopard = None
|
||||
|
||||
if self._porcupine:
|
||||
self._porcupine.delete()
|
||||
self._porcupine = None
|
||||
|
||||
if self._rhino:
|
||||
self._rhino.delete()
|
||||
self._rhino = None
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
has_data = False
|
||||
if self.should_stop() or not self._recorder:
|
||||
raise StopIteration
|
||||
|
||||
while not (self.should_stop() or has_data):
|
||||
if self.porcupine: # TODO also check current state
|
||||
data = self._recorder.read()
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
frame, t = data
|
||||
if time() - t > self.frame_expiration:
|
||||
self.logger.info(
|
||||
'Skipping audio frame older than %ss', self.frame_expiration
|
||||
)
|
||||
continue # The audio frame is too old
|
||||
|
||||
keyword_index = self.porcupine.process(frame)
|
||||
if keyword_index is None:
|
||||
continue # No keyword detected
|
||||
|
||||
if keyword_index >= 0 and self.keywords:
|
||||
return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
|
||||
|
||||
raise StopIteration
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
77
platypush/plugins/picovoice/_recorder.py
Normal file
77
platypush/plugins/picovoice/_recorder.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from collections import namedtuple
|
||||
from logging import getLogger
|
||||
from queue import Full, Queue
|
||||
from threading import Event
|
||||
from time import time
|
||||
from typing import Optional
|
||||
|
||||
import sounddevice as sd
|
||||
|
||||
from platypush.utils import wait_for_either
|
||||
|
||||
|
||||
AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
|
||||
|
||||
|
||||
class AudioRecorder:
|
||||
"""
|
||||
Audio recorder component that uses the sounddevice library to record audio
|
||||
from the microphone.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stop_event: Event,
|
||||
sample_rate: int,
|
||||
frame_size: int,
|
||||
channels: int,
|
||||
dtype: str = 'int16',
|
||||
queue_size: int = 20,
|
||||
):
|
||||
self.logger = getLogger(__name__)
|
||||
self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
|
||||
self.frame_size = frame_size
|
||||
self._stop_event = Event()
|
||||
self._upstream_stop_event = stop_event
|
||||
self.stream = sd.InputStream(
|
||||
samplerate=sample_rate,
|
||||
channels=channels,
|
||||
dtype=dtype,
|
||||
blocksize=frame_size,
|
||||
callback=self._audio_callback,
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
self._stop_event.clear()
|
||||
self.stream.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, *_):
|
||||
self.stop()
|
||||
# self.stream.close()
|
||||
|
||||
def _audio_callback(self, indata, *_):
|
||||
if self.should_stop():
|
||||
return
|
||||
|
||||
try:
|
||||
self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
|
||||
except Full:
|
||||
self.logger.warning('Audio queue is full, dropping audio frame')
|
||||
|
||||
def read(self, timeout: Optional[float] = None):
|
||||
try:
|
||||
return self._audio_queue.get(timeout=timeout)
|
||||
except TimeoutError:
|
||||
self.logger.debug('Audio queue is empty')
|
||||
return None
|
||||
|
||||
def stop(self):
|
||||
self._stop_event.set()
|
||||
self.stream.stop()
|
||||
|
||||
def should_stop(self):
|
||||
return self._stop_event.is_set() or self._upstream_stop_event.is_set()
|
||||
|
||||
def wait(self, timeout: Optional[float] = None):
|
||||
wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
|
14
platypush/plugins/picovoice/_state.py
Normal file
14
platypush/plugins/picovoice/_state.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class AssistantState(Enum):
|
||||
"""
|
||||
Possible states of the assistant.
|
||||
"""
|
||||
|
||||
IDLE = 'idle'
|
||||
DETECTING_HOTWORD = 'detecting_hotword'
|
||||
DETECTING_SPEECH = 'detecting_speech'
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
22
platypush/plugins/picovoice/manifest.yaml
Normal file
22
platypush/plugins/picovoice/manifest.yaml
Normal file
|
@ -0,0 +1,22 @@
|
|||
manifest:
|
||||
package: platypush.plugins.picovoice
|
||||
type: plugin
|
||||
events:
|
||||
- platypush.message.event.assistant.ConversationEndEvent
|
||||
- platypush.message.event.assistant.ConversationStartEvent
|
||||
- platypush.message.event.assistant.ConversationTimeoutEvent
|
||||
- platypush.message.event.assistant.HotwordDetectedEvent
|
||||
- platypush.message.event.assistant.MicMutedEvent
|
||||
- platypush.message.event.assistant.MicUnmutedEvent
|
||||
- platypush.message.event.assistant.NoResponseEvent
|
||||
- platypush.message.event.assistant.ResponseEvent
|
||||
- platypush.message.event.assistant.SpeechRecognizedEvent
|
||||
install:
|
||||
pacman:
|
||||
- python-sounddevice
|
||||
pip:
|
||||
- pvcheetah
|
||||
- pvleopard
|
||||
- pvporcupine
|
||||
- pvrhino
|
||||
- sounddevice
|
|
@ -247,10 +247,12 @@ class AudioManager:
|
|||
wait_start = time()
|
||||
for audio_thread in streams_to_stop:
|
||||
audio_thread.join(
|
||||
timeout=max(0, timeout - (time() - wait_start))
|
||||
timeout=(
|
||||
max(0, timeout - (time() - wait_start))
|
||||
if timeout is not None
|
||||
else None
|
||||
)
|
||||
)
|
||||
|
||||
# Remove references
|
||||
for audio_thread in streams_to_stop:
|
||||
|
|
|
@ -83,7 +83,9 @@ mock_imports = [
|
|||
"pmw3901",
|
||||
"psutil",
|
||||
"pvcheetah",
|
||||
"pvleopard",
|
||||
"pvporcupine",
|
||||
"pvrhino",
|
||||
"pyHS100",
|
||||
"pyaudio",
|
||||
"pychromecast",
|
||||
|
|
Loading…
Reference in a new issue