[assistant.picovoice] Extended documentation.

2024-05-02 02:46:32 +02:00 · 2024-05-02 02:46:32 +02:00 · 72bc697122
parent b2c07a31f2
commit 72bc697122
1 changed files with 314 additions and 13 deletions
--- a/platypush/plugins/assistant/picovoice/init.py
+++ b/platypush/plugins/assistant/picovoice/init.py
@ -12,27 +12,18 @@ from ._state import AssistantState

 # pylint: disable=too-many-ancestors
 class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
-    """
+    r"""
    A voice assistant that runs on your device, based on the `Picovoice
    <https://picovoice.ai/>`_ engine.

-    .. note:: You will need a PicoVoice account and a personal access key to
-        use this integration.
-
-    You can get your personal access key by signing up at the `Picovoice
-    console <https://console.picovoice.ai/>`_. You may be asked to submit a
-    reason for using the service (feel free to mention a personal Platypush
-    integration), and you will receive your personal access key.
-
-    You may also be asked to select which products you want to use. The default
-    configuration of this plugin requires the following:
+    Picovoice is a suite of on-device voice technologies that include:

        * **Porcupine**: wake-word engine, if you want the device to listen for
          a specific wake word in order to start the assistant.

        * **Cheetah**: speech-to-text engine, if you want your voice
-          interactions to be transcribed into free text - either programmatically
-          or when triggered by the wake word. Or:
+          interactions to be transcribed into free text - either
+          programmatically or when triggered by the wake word. Or:

        * **Rhino**: intent recognition engine, if you want to extract *intents*
          out of your voice commands - for instance, the phrase "set the living
@ -47,6 +38,316 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
          logic to respond to user's voice commands and render the responses as
          audio.

+    This plugin is a wrapper around the Picovoice engine that allows you to
+    run your custom voice-based conversational flows on your device.
+
+    Getting a Picovoice account and access key
+    -------------------------------------------
+
+    You can get your personal access key by signing up at the `Picovoice
+    console <https://console.picovoice.ai/>`_. You may be asked to submit a
+    reason for using the service (feel free to mention a personal Platypush
+    integration), and you will receive your personal access key.
+
+    If prompted to select the products you want to use, make sure to select
+    the ones from the Picovoice suite that you want to use with this plugin.
+
+
+    Hotword detection
+    -----------------
+
+    The hotword detection engine is based on `Porcupine
+    <https://picovoice.ai/platform/porcupine/>`_.
+
+    If enabled through the ``hotword_enabled`` parameter (default: True), the
+    assistant will listen for a specific wake word before starting the
+    speech-to-text or intent recognition engines. You can specify custom models
+    for your hotword (e.g. on the same device you may use "Alexa" to trigger the
+    speech-to-text engine in English, "Computer" to trigger the speech-to-text
+    engine in Italian, and "Ok Google" to trigger the intent recognition engine.
+
+    You can also create your custom hotword models using the `Porcupine console
+    <https://console.picovoice.ai/ppn>`_.
+
+    If ``hotword_enabled`` is set to True, you must also specify the
+    ``keywords`` parameter with the list of keywords that you want to listen
+    for, and optionally the ``keyword_paths`` parameter with the paths to the
+    any custom hotword models that you want to use. If ``hotword_enabled`` is
+    set to False, then the assistant won't start listening for speech after the
+    plugin is started, and you will need to programmatically start the
+    conversation by calling the :meth:`.start_conversation` action, or trigger
+    it from the UI.
+
+    When a wake-word is detected, the assistant will emit a
+    :class:`platypush.message.event.assistant.HotwordDetectedEvent` event that
+    you can use to build your custom logic. For example:
+
+      .. code-block:: python
+
+        import time
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import HotwordDetectedEvent
+
+        # Turn on a light for 5 seconds when the hotword "Alexa" is detected
+        @hook(HotwordDetectedEvent, hotword='Alexa')
+        def on_hotword_detected(event: HotwordDetectedEvent, **context):
+            run("light.hue.on", lights=["Living Room"])
+            time.sleep(5)
+            run("light.hue.off", lights=["Living Room"])
+
+    By default, the assistant will start listening for speech after the hotword
+    if either ``stt_enabled`` or ``intent_model_path`` are set. If you don't
+    want the assistant to start listening for speech after the hotword is
+    detected (for example because you want to build your custom response flows,
+    or trigger the speech detection using different models depending on the
+    hotword that is used, or because you just want to detect hotwords but not
+    speech), then you can also set the ``start_conversation_on_hotword``
+    parameter to ``False``. If that is the case, then you can programmatically
+    start the conversation by calling the :meth:`.start_conversation` method in
+    your event hooks:
+
+      .. code-block:: python
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import HotwordDetectedEvent
+
+        # Start a conversation using the Italian language model when the
+        # "Buongiorno" hotword is detected
+        @hook(HotwordDetectedEvent, hotword='Buongiorno')
+        def on_it_hotword_detected(event: HotwordDetectedEvent, **context):
+            event.assistant.start_conversation(model_file='path/to/it.pv')
+
+    Speech-to-text
+    --------------
+
+    The speech-to-text engine is based on `Cheetah
+    <https://picovoice.ai/docs/cheetah/>`_.
+
+    If enabled through the ``stt_enabled`` parameter (default: True), the
+    assistant will transcribe the voice commands into text when a conversation
+    is started either programmatically through :meth:`.start_conversation` or
+    when the hotword is detected.
+
+    It will emit a
+    :class:`platypush.message.event.assistant.SpeechRecognizedEvent` when some
+    speech is detected, and you can hook to that event to build your custom
+    logic:
+
+      .. code-block:: python
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import SpeechRecognizedEvent
+
+        # Turn on a light when the phrase "turn on the lights" is detected.
+        # Note that we can leverage regex-based pattern matching to be more
+        # flexible when matching the phrases. For example, the following hook
+        # will be matched when the user says "turn on the lights", "turn on
+        # lights", "lights on", "lights on please", "turn on light" etc.
+        @hook(SpeechRecognizedEvent, phrase='turn on (the)? lights?')
+        def on_turn_on_lights(event: SpeechRecognizedEvent, **context):
+            run("light.hue.on")
+
+    You can also leverage context extraction through the ``${}`` syntax on the
+    hook to extract specific tokens from the event that can be passed to your
+    event hook. For example:
+
+      .. code-block:: python
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import SpeechRecognizedEvent
+
+        @hook(SpeechRecognizedEvent, phrase='play ${title} by ${artist}')
+        def on_play_track_command(
+            event: SpeechRecognizedEvent, title: str, artist: str, **context
+        ):
+            results = run(
+                "music.mopidy.search",
+                filter={"title": title, "artist": artist}
+            )
+
+            if not results:
+                event.assistant.render_response(f"Couldn't find {title} by {artist}")
+                return
+
+            run("music.mopidy.play", resource=results[0]["uri"])
+
+    Speech-to-intent
+    ----------------
+
+    The intent recognition engine is based on `Rhino
+    <https://picovoice.ai/docs/rhino/>`_.
+
+    *Intents* are snippets of unstructured transcribed speech that can be
+    matched to structured actions.
+
+    Unlike with hotword and speech-to-text detection, you need to provide a
+    custom model for intent detection. You can create your custom model using
+    the `Rhino console <https://console.picovoice.ai/rhn>`_.
+
+    When an intent is detected, the assistant will emit a
+    :class:`platypush.message.event.assistant.IntentRecognizedEvent` that can
+    be listened.
+
+    For example, you can train a model to control groups of smart lights by
+    defining the following slots on the Rhino console:
+
+        - ``device_state``: The new state of the device (e.g. with ``on`` or
+          ``off`` as supported values)
+
+        - ``room``: The name of the room associated to the group of lights to
+          be controlled (e.g. ``living room``, ``kitchen``, ``bedroom``)
+
+    You can then define a ``lights_ctrl`` intent with the following expressions:
+
+        - "turn ``$device_state:state`` the lights"
+        - "turn ``$device_state:state`` the ``$room:room`` lights"
+        - "turn the lights ``$device_state:state``"
+        - "turn the ``$room:room`` lights ``$device_state:state``"
+        - "turn ``$room:room`` lights ``$device_state:state``"
+
+    This intent will match any of the following phrases:
+
+        - "*turn on the lights*"
+        - "*turn off the lights*"
+        - "*turn the lights on*"
+        - "*turn the lights off*"
+        - "*turn on the living room lights*"
+        - "*turn off the living room lights*"
+        - "*turn the living room lights on*"
+        - "*turn the living room lights off*"
+
+    And it will extract any slots that are matched in the phrases in the
+    :class:`platypush.message.event.assistant.IntentRecognizedEvent` event.
+
+    Train the model, download the context file, and pass the path on the
+    ``intent_model_path`` parameter.
+
+    You can then register a hook to listen to a specific intent:
+
+      .. code-block:: python
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import IntentRecognizedEvent
+
+        @hook(IntentRecognizedEvent, intent='lights_ctrl', slots={'state': 'on'})
+        def on_turn_on_lights(event: IntentRecognizedEvent, **context):
+            room = event.slots.get('room')
+            if room:
+                run("light.hue.on", groups=[room])
+            else:
+                run("light.hue.on")
+
+    Note that if both ``stt_enabled`` and ``intent_model_path`` are set, then
+    both the speech-to-text and intent recognition engines will run in parallel
+    when a conversation is started.
+
+    The intent engine is usually faster, as it has a smaller set of intents to
+    match and doesn't have to run a full speech-to-text transcription. This means that,
+    if an utterance matches both a speech-to-text phrase and an intent, the
+    :class:`platypush.message.event.assistant.IntentRecognizedEvent` event is emitted
+    (and not :class:`platypush.message.event.assistant.SpeechRecognizedEvent`).
+
+    This may not be always the case though. So it may be a good practice to
+    also provide a fallback
+    :class:`platypush.message.event.assistant.SpeechRecognizedEvent` hook to
+    catch the text if the speech is not recognized as an intent:
+
+      .. code-block:: python
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import SpeechRecognizedEvent
+
+        @hook(SpeechRecognizedEvent, phrase='turn ${state} (the)? ${room} lights?')
+        def on_turn_on_lights(event: SpeechRecognizedEvent, phrase, room, **context):
+            if room:
+                run("light.hue.on", groups=[room])
+            else:
+                run("light.hue.on")
+
+    Text-to-speech
+    --------------
+
+    The text-to-speech engine is based on `Orca
+    <https://picovoice.ai/docs/orca/>`_.
+
+    It is not directly implemented by this plugin, but the implementation is
+    provided in the :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin`
+    plugin.
+
+    You can however leverage the :meth:`.render_response` action to render some
+    text as speech in response to a user command, and that in turn will leverage
+    the PicoVoice TTS plugin to render the response.
+
+    For example, the following snippet provides a hook that:
+
+        - Listens for
+          :class:`platypush.message.event.assistant.SpeechRecognizedEvent`.
+
+        - Matches the phrase against a list of predefined commands that
+          shouldn't require a response.
+
+        - Has a fallback logic that leverages the
+          :class:`platypush.plugins.openai.OpenaiPlugin` to generate a response
+          for the given text and renders it as speech.
+
+        - Has a logic for follow-on turns if the response from ChatGPT is a question.
+
+      .. code-block:: python
+
+        import re
+        from collections import defaultdict
+        from datetime import datetime as dt, timedelta
+        from dateutil.parser import isoparse
+        from logging import getLogger
+
+        from platypush import hook, run
+        from platypush.message.event.assistant import (
+            SpeechRecognizedEvent,
+            ResponseEndEvent,
+        )
+
+        logger = getLogger(__name__)
+
+        def play_music(*_, **__):
+            run("music.mopidy.play")
+
+        def stop_music(*_, **__):
+            run("music.mopidy.stop")
+
+        def ai_assist(event: SpeechRecognizedEvent, **__):
+            response = run("openai.get_response", prompt=event.phrase)
+            if not response:
+                return
+
+            run("assistant.picovoice.render_response", text=response)
+
+        # List of commands to match, as pairs of regex patterns and the
+        # corresponding actions
+        hooks = (
+            (re.compile(r"play (the)?music", re.IGNORECASE), play_music),
+            (re.compile(r"stop (the)?music", re.IGNORECASE), stop_music),
+            # Fallback to the AI assistant
+            (re.compile(r".*"), ai_assist),
+        )
+
+        @hook(SpeechRecognizedEvent)
+        def on_speech_recognized(event, **kwargs):
+            for pattern, command in hooks:
+                if pattern.search(event.phrase):
+                    logger.info("Running voice command %s", command.__name__)
+                    command(event, **kwargs)
+                    break
+
+        @hook(ResponseEndEvent)
+        def on_response_end(event: ResponseEndEvent, **__):
+            # Check if the response is a question and start a follow-on turn if so.
+            # Note that the ``openai`` plugin by default is configured to keep
+            # the past interaction in a context window of ~10 minutes, so you
+            # can follow up like in a real conversation.
+            if event.assistant and event.response_text and event.response_text.endswith("?"):
+                event.assistant.start_conversation()
+
    """

    def __init__(