forked from platypush/platypush
[assistant.picovoice] Extended documentation.
This commit is contained in:
parent
b2c07a31f2
commit
72bc697122
1 changed files with 314 additions and 13 deletions
|
@ -12,27 +12,18 @@ from ._state import AssistantState
|
||||||
|
|
||||||
# pylint: disable=too-many-ancestors
|
# pylint: disable=too-many-ancestors
|
||||||
class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
"""
|
r"""
|
||||||
A voice assistant that runs on your device, based on the `Picovoice
|
A voice assistant that runs on your device, based on the `Picovoice
|
||||||
<https://picovoice.ai/>`_ engine.
|
<https://picovoice.ai/>`_ engine.
|
||||||
|
|
||||||
.. note:: You will need a PicoVoice account and a personal access key to
|
Picovoice is a suite of on-device voice technologies that include:
|
||||||
use this integration.
|
|
||||||
|
|
||||||
You can get your personal access key by signing up at the `Picovoice
|
|
||||||
console <https://console.picovoice.ai/>`_. You may be asked to submit a
|
|
||||||
reason for using the service (feel free to mention a personal Platypush
|
|
||||||
integration), and you will receive your personal access key.
|
|
||||||
|
|
||||||
You may also be asked to select which products you want to use. The default
|
|
||||||
configuration of this plugin requires the following:
|
|
||||||
|
|
||||||
* **Porcupine**: wake-word engine, if you want the device to listen for
|
* **Porcupine**: wake-word engine, if you want the device to listen for
|
||||||
a specific wake word in order to start the assistant.
|
a specific wake word in order to start the assistant.
|
||||||
|
|
||||||
* **Cheetah**: speech-to-text engine, if you want your voice
|
* **Cheetah**: speech-to-text engine, if you want your voice
|
||||||
interactions to be transcribed into free text - either programmatically
|
interactions to be transcribed into free text - either
|
||||||
or when triggered by the wake word. Or:
|
programmatically or when triggered by the wake word. Or:
|
||||||
|
|
||||||
* **Rhino**: intent recognition engine, if you want to extract *intents*
|
* **Rhino**: intent recognition engine, if you want to extract *intents*
|
||||||
out of your voice commands - for instance, the phrase "set the living
|
out of your voice commands - for instance, the phrase "set the living
|
||||||
|
@ -47,6 +38,316 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
logic to respond to user's voice commands and render the responses as
|
logic to respond to user's voice commands and render the responses as
|
||||||
audio.
|
audio.
|
||||||
|
|
||||||
|
This plugin is a wrapper around the Picovoice engine that allows you to
|
||||||
|
run your custom voice-based conversational flows on your device.
|
||||||
|
|
||||||
|
Getting a Picovoice account and access key
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
You can get your personal access key by signing up at the `Picovoice
|
||||||
|
console <https://console.picovoice.ai/>`_. You may be asked to submit a
|
||||||
|
reason for using the service (feel free to mention a personal Platypush
|
||||||
|
integration), and you will receive your personal access key.
|
||||||
|
|
||||||
|
If prompted to select the products you want to use, make sure to select
|
||||||
|
the ones from the Picovoice suite that you want to use with this plugin.
|
||||||
|
|
||||||
|
|
||||||
|
Hotword detection
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The hotword detection engine is based on `Porcupine
|
||||||
|
<https://picovoice.ai/platform/porcupine/>`_.
|
||||||
|
|
||||||
|
If enabled through the ``hotword_enabled`` parameter (default: True), the
|
||||||
|
assistant will listen for a specific wake word before starting the
|
||||||
|
speech-to-text or intent recognition engines. You can specify custom models
|
||||||
|
for your hotword (e.g. on the same device you may use "Alexa" to trigger the
|
||||||
|
speech-to-text engine in English, "Computer" to trigger the speech-to-text
|
||||||
|
engine in Italian, and "Ok Google" to trigger the intent recognition engine.
|
||||||
|
|
||||||
|
You can also create your custom hotword models using the `Porcupine console
|
||||||
|
<https://console.picovoice.ai/ppn>`_.
|
||||||
|
|
||||||
|
If ``hotword_enabled`` is set to True, you must also specify the
|
||||||
|
``keywords`` parameter with the list of keywords that you want to listen
|
||||||
|
for, and optionally the ``keyword_paths`` parameter with the paths to the
|
||||||
|
any custom hotword models that you want to use. If ``hotword_enabled`` is
|
||||||
|
set to False, then the assistant won't start listening for speech after the
|
||||||
|
plugin is started, and you will need to programmatically start the
|
||||||
|
conversation by calling the :meth:`.start_conversation` action, or trigger
|
||||||
|
it from the UI.
|
||||||
|
|
||||||
|
When a wake-word is detected, the assistant will emit a
|
||||||
|
:class:`platypush.message.event.assistant.HotwordDetectedEvent` event that
|
||||||
|
you can use to build your custom logic. For example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import HotwordDetectedEvent
|
||||||
|
|
||||||
|
# Turn on a light for 5 seconds when the hotword "Alexa" is detected
|
||||||
|
@hook(HotwordDetectedEvent, hotword='Alexa')
|
||||||
|
def on_hotword_detected(event: HotwordDetectedEvent, **context):
|
||||||
|
run("light.hue.on", lights=["Living Room"])
|
||||||
|
time.sleep(5)
|
||||||
|
run("light.hue.off", lights=["Living Room"])
|
||||||
|
|
||||||
|
By default, the assistant will start listening for speech after the hotword
|
||||||
|
if either ``stt_enabled`` or ``intent_model_path`` are set. If you don't
|
||||||
|
want the assistant to start listening for speech after the hotword is
|
||||||
|
detected (for example because you want to build your custom response flows,
|
||||||
|
or trigger the speech detection using different models depending on the
|
||||||
|
hotword that is used, or because you just want to detect hotwords but not
|
||||||
|
speech), then you can also set the ``start_conversation_on_hotword``
|
||||||
|
parameter to ``False``. If that is the case, then you can programmatically
|
||||||
|
start the conversation by calling the :meth:`.start_conversation` method in
|
||||||
|
your event hooks:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import HotwordDetectedEvent
|
||||||
|
|
||||||
|
# Start a conversation using the Italian language model when the
|
||||||
|
# "Buongiorno" hotword is detected
|
||||||
|
@hook(HotwordDetectedEvent, hotword='Buongiorno')
|
||||||
|
def on_it_hotword_detected(event: HotwordDetectedEvent, **context):
|
||||||
|
event.assistant.start_conversation(model_file='path/to/it.pv')
|
||||||
|
|
||||||
|
Speech-to-text
|
||||||
|
--------------
|
||||||
|
|
||||||
|
The speech-to-text engine is based on `Cheetah
|
||||||
|
<https://picovoice.ai/docs/cheetah/>`_.
|
||||||
|
|
||||||
|
If enabled through the ``stt_enabled`` parameter (default: True), the
|
||||||
|
assistant will transcribe the voice commands into text when a conversation
|
||||||
|
is started either programmatically through :meth:`.start_conversation` or
|
||||||
|
when the hotword is detected.
|
||||||
|
|
||||||
|
It will emit a
|
||||||
|
:class:`platypush.message.event.assistant.SpeechRecognizedEvent` when some
|
||||||
|
speech is detected, and you can hook to that event to build your custom
|
||||||
|
logic:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import SpeechRecognizedEvent
|
||||||
|
|
||||||
|
# Turn on a light when the phrase "turn on the lights" is detected.
|
||||||
|
# Note that we can leverage regex-based pattern matching to be more
|
||||||
|
# flexible when matching the phrases. For example, the following hook
|
||||||
|
# will be matched when the user says "turn on the lights", "turn on
|
||||||
|
# lights", "lights on", "lights on please", "turn on light" etc.
|
||||||
|
@hook(SpeechRecognizedEvent, phrase='turn on (the)? lights?')
|
||||||
|
def on_turn_on_lights(event: SpeechRecognizedEvent, **context):
|
||||||
|
run("light.hue.on")
|
||||||
|
|
||||||
|
You can also leverage context extraction through the ``${}`` syntax on the
|
||||||
|
hook to extract specific tokens from the event that can be passed to your
|
||||||
|
event hook. For example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import SpeechRecognizedEvent
|
||||||
|
|
||||||
|
@hook(SpeechRecognizedEvent, phrase='play ${title} by ${artist}')
|
||||||
|
def on_play_track_command(
|
||||||
|
event: SpeechRecognizedEvent, title: str, artist: str, **context
|
||||||
|
):
|
||||||
|
results = run(
|
||||||
|
"music.mopidy.search",
|
||||||
|
filter={"title": title, "artist": artist}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
event.assistant.render_response(f"Couldn't find {title} by {artist}")
|
||||||
|
return
|
||||||
|
|
||||||
|
run("music.mopidy.play", resource=results[0]["uri"])
|
||||||
|
|
||||||
|
Speech-to-intent
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The intent recognition engine is based on `Rhino
|
||||||
|
<https://picovoice.ai/docs/rhino/>`_.
|
||||||
|
|
||||||
|
*Intents* are snippets of unstructured transcribed speech that can be
|
||||||
|
matched to structured actions.
|
||||||
|
|
||||||
|
Unlike with hotword and speech-to-text detection, you need to provide a
|
||||||
|
custom model for intent detection. You can create your custom model using
|
||||||
|
the `Rhino console <https://console.picovoice.ai/rhn>`_.
|
||||||
|
|
||||||
|
When an intent is detected, the assistant will emit a
|
||||||
|
:class:`platypush.message.event.assistant.IntentRecognizedEvent` that can
|
||||||
|
be listened.
|
||||||
|
|
||||||
|
For example, you can train a model to control groups of smart lights by
|
||||||
|
defining the following slots on the Rhino console:
|
||||||
|
|
||||||
|
- ``device_state``: The new state of the device (e.g. with ``on`` or
|
||||||
|
``off`` as supported values)
|
||||||
|
|
||||||
|
- ``room``: The name of the room associated to the group of lights to
|
||||||
|
be controlled (e.g. ``living room``, ``kitchen``, ``bedroom``)
|
||||||
|
|
||||||
|
You can then define a ``lights_ctrl`` intent with the following expressions:
|
||||||
|
|
||||||
|
- "turn ``$device_state:state`` the lights"
|
||||||
|
- "turn ``$device_state:state`` the ``$room:room`` lights"
|
||||||
|
- "turn the lights ``$device_state:state``"
|
||||||
|
- "turn the ``$room:room`` lights ``$device_state:state``"
|
||||||
|
- "turn ``$room:room`` lights ``$device_state:state``"
|
||||||
|
|
||||||
|
This intent will match any of the following phrases:
|
||||||
|
|
||||||
|
- "*turn on the lights*"
|
||||||
|
- "*turn off the lights*"
|
||||||
|
- "*turn the lights on*"
|
||||||
|
- "*turn the lights off*"
|
||||||
|
- "*turn on the living room lights*"
|
||||||
|
- "*turn off the living room lights*"
|
||||||
|
- "*turn the living room lights on*"
|
||||||
|
- "*turn the living room lights off*"
|
||||||
|
|
||||||
|
And it will extract any slots that are matched in the phrases in the
|
||||||
|
:class:`platypush.message.event.assistant.IntentRecognizedEvent` event.
|
||||||
|
|
||||||
|
Train the model, download the context file, and pass the path on the
|
||||||
|
``intent_model_path`` parameter.
|
||||||
|
|
||||||
|
You can then register a hook to listen to a specific intent:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import IntentRecognizedEvent
|
||||||
|
|
||||||
|
@hook(IntentRecognizedEvent, intent='lights_ctrl', slots={'state': 'on'})
|
||||||
|
def on_turn_on_lights(event: IntentRecognizedEvent, **context):
|
||||||
|
room = event.slots.get('room')
|
||||||
|
if room:
|
||||||
|
run("light.hue.on", groups=[room])
|
||||||
|
else:
|
||||||
|
run("light.hue.on")
|
||||||
|
|
||||||
|
Note that if both ``stt_enabled`` and ``intent_model_path`` are set, then
|
||||||
|
both the speech-to-text and intent recognition engines will run in parallel
|
||||||
|
when a conversation is started.
|
||||||
|
|
||||||
|
The intent engine is usually faster, as it has a smaller set of intents to
|
||||||
|
match and doesn't have to run a full speech-to-text transcription. This means that,
|
||||||
|
if an utterance matches both a speech-to-text phrase and an intent, the
|
||||||
|
:class:`platypush.message.event.assistant.IntentRecognizedEvent` event is emitted
|
||||||
|
(and not :class:`platypush.message.event.assistant.SpeechRecognizedEvent`).
|
||||||
|
|
||||||
|
This may not be always the case though. So it may be a good practice to
|
||||||
|
also provide a fallback
|
||||||
|
:class:`platypush.message.event.assistant.SpeechRecognizedEvent` hook to
|
||||||
|
catch the text if the speech is not recognized as an intent:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import SpeechRecognizedEvent
|
||||||
|
|
||||||
|
@hook(SpeechRecognizedEvent, phrase='turn ${state} (the)? ${room} lights?')
|
||||||
|
def on_turn_on_lights(event: SpeechRecognizedEvent, phrase, room, **context):
|
||||||
|
if room:
|
||||||
|
run("light.hue.on", groups=[room])
|
||||||
|
else:
|
||||||
|
run("light.hue.on")
|
||||||
|
|
||||||
|
Text-to-speech
|
||||||
|
--------------
|
||||||
|
|
||||||
|
The text-to-speech engine is based on `Orca
|
||||||
|
<https://picovoice.ai/docs/orca/>`_.
|
||||||
|
|
||||||
|
It is not directly implemented by this plugin, but the implementation is
|
||||||
|
provided in the :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin`
|
||||||
|
plugin.
|
||||||
|
|
||||||
|
You can however leverage the :meth:`.render_response` action to render some
|
||||||
|
text as speech in response to a user command, and that in turn will leverage
|
||||||
|
the PicoVoice TTS plugin to render the response.
|
||||||
|
|
||||||
|
For example, the following snippet provides a hook that:
|
||||||
|
|
||||||
|
- Listens for
|
||||||
|
:class:`platypush.message.event.assistant.SpeechRecognizedEvent`.
|
||||||
|
|
||||||
|
- Matches the phrase against a list of predefined commands that
|
||||||
|
shouldn't require a response.
|
||||||
|
|
||||||
|
- Has a fallback logic that leverages the
|
||||||
|
:class:`platypush.plugins.openai.OpenaiPlugin` to generate a response
|
||||||
|
for the given text and renders it as speech.
|
||||||
|
|
||||||
|
- Has a logic for follow-on turns if the response from ChatGPT is a question.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime as dt, timedelta
|
||||||
|
from dateutil.parser import isoparse
|
||||||
|
from logging import getLogger
|
||||||
|
|
||||||
|
from platypush import hook, run
|
||||||
|
from platypush.message.event.assistant import (
|
||||||
|
SpeechRecognizedEvent,
|
||||||
|
ResponseEndEvent,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
|
def play_music(*_, **__):
|
||||||
|
run("music.mopidy.play")
|
||||||
|
|
||||||
|
def stop_music(*_, **__):
|
||||||
|
run("music.mopidy.stop")
|
||||||
|
|
||||||
|
def ai_assist(event: SpeechRecognizedEvent, **__):
|
||||||
|
response = run("openai.get_response", prompt=event.phrase)
|
||||||
|
if not response:
|
||||||
|
return
|
||||||
|
|
||||||
|
run("assistant.picovoice.render_response", text=response)
|
||||||
|
|
||||||
|
# List of commands to match, as pairs of regex patterns and the
|
||||||
|
# corresponding actions
|
||||||
|
hooks = (
|
||||||
|
(re.compile(r"play (the)?music", re.IGNORECASE), play_music),
|
||||||
|
(re.compile(r"stop (the)?music", re.IGNORECASE), stop_music),
|
||||||
|
# Fallback to the AI assistant
|
||||||
|
(re.compile(r".*"), ai_assist),
|
||||||
|
)
|
||||||
|
|
||||||
|
@hook(SpeechRecognizedEvent)
|
||||||
|
def on_speech_recognized(event, **kwargs):
|
||||||
|
for pattern, command in hooks:
|
||||||
|
if pattern.search(event.phrase):
|
||||||
|
logger.info("Running voice command %s", command.__name__)
|
||||||
|
command(event, **kwargs)
|
||||||
|
break
|
||||||
|
|
||||||
|
@hook(ResponseEndEvent)
|
||||||
|
def on_response_end(event: ResponseEndEvent, **__):
|
||||||
|
# Check if the response is a question and start a follow-on turn if so.
|
||||||
|
# Note that the ``openai`` plugin by default is configured to keep
|
||||||
|
# the past interaction in a context window of ~10 minutes, so you
|
||||||
|
# can follow up like in a real conversation.
|
||||||
|
if event.assistant and event.response_text and event.response_text.endswith("?"):
|
||||||
|
event.assistant.start_conversation()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
Loading…
Reference in a new issue