Added assistant.picovoice.transcribe and assistant.picovoice.say.

2024-04-14 01:07:17 +02:00 · 2024-04-14 01:07:17 +02:00 · f0a780b759
commit f0a780b759
parent f04f7ce9d7
5 changed files with 101 additions and 11 deletions
--- a/docs/source/platypush/plugins/assistant.picovoice.rst
+++ b/docs/source/platypush/plugins/assistant.picovoice.rst
@ -0,0 +1,5 @@
+``assistant.picovoice``
+=======================
+
+.. automodule:: platypush.plugins.assistant.picovoice
+    :members:
--- a/docs/source/platypush/plugins/picovoice.rst
+++ b/docs/source/platypush/plugins/picovoice.rst
@ -1,5 +0,0 @@
-``picovoice``
-=============
-
-.. automodule:: platypush.plugins.picovoice
-    :members:
--- a/docs/source/platypush/plugins/tts.picovoice.rst
+++ b/docs/source/platypush/plugins/tts.picovoice.rst
@ -0,0 +1,5 @@
+``tts.picovoice``
+=================
+
+.. automodule:: platypush.plugins.tts.picovoice
+    :members:
--- a/docs/source/plugins.rst
+++ b/docs/source/plugins.rst
@ -11,6 +11,7 @@ Plugins
    platypush/plugins/application.rst
    platypush/plugins/arduino.rst
    platypush/plugins/assistant.google.rst
+    platypush/plugins/assistant.picovoice.rst
    platypush/plugins/autoremote.rst
    platypush/plugins/bluetooth.rst
    platypush/plugins/calendar.rst
@ -95,7 +96,6 @@ Plugins
    platypush/plugins/nmap.rst
    platypush/plugins/ntfy.rst
    platypush/plugins/otp.rst
-    platypush/plugins/picovoice.rst
    platypush/plugins/pihole.rst
    platypush/plugins/ping.rst
    platypush/plugins/printer.cups.rst
@ -134,6 +134,7 @@ Plugins
    platypush/plugins/tts.rst
    platypush/plugins/tts.google.rst
    platypush/plugins/tts.mimic3.rst
+    platypush/plugins/tts.picovoice.rst
    platypush/plugins/tv.samsung.ws.rst
    platypush/plugins/twilio.rst
    platypush/plugins/udp.rst
--- a/platypush/plugins/assistant/picovoice/init.py
+++ b/platypush/plugins/assistant/picovoice/init.py
@ -82,26 +82,26 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
            to the products available in your Picovoice account.
        :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
            google``...). This is required if the wake-word engine is enabled.
-            See the `Picovoice repository
+            See the `Porcupine keywords repository
            <https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
            for a list of the stock keywords available. If you have a custom
            model, you can pass its path to the ``keyword_paths`` parameter and
            its filename (without the path and the platform extension) here.
        :param keyword_paths: List of paths to the keyword files to listen for.
-            Custom keyword files can be created using the `Picovoice console
+            Custom keyword files can be created using the `Porcupine console
            <https://console.picovoice.ai/ppn>`_ and downloaded from the
            console itself.
        :param keyword_model_path: If you are using a keyword file in a
            non-English language, you can provide the path to the model file
            for its language. Model files are available for all the supported
-            languages through the `Picovoice repository
+            languages through the `Porcupine lib repository
            <https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
        :param speech_model_path: Path to the speech model file. If you are
            using a language other than English, you can provide the path to the
            model file for that language. Model files are available for all the
-            supported languages through the `Picovoice repository
+            supported languages through the `Cheetah repository
            <https://github.com/Picovoice/cheetah/tree/master/lib/common>`_.
-            You can also use the `Picovoice console
+            You can also use the `Speech console
            <https://console.picovoice.ai/cat>`_
            to train your custom models. You can use a base model and fine-tune
            it by boosting the detection of your own words and phrases and edit
@ -215,6 +215,90 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
        else:
            self._assistant.state = AssistantState.IDLE

+    @action
+    def say(self, text: str, *args, **kwargs):
+        """
+        Proxy to
+        :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin.say` to
+        render some text as speech through the Picovoice TTS engine.
+
+        Extra arguments to
+        :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin.say` can be
+        passed over ``args`` and ``kwargs``.
+
+        :param text: Text to be rendered as speech.
+        """
+        return self.tts.say(text, *args, **kwargs)
+
+    @action
+    def transcribe(self, audio_file: str, *_, model_file: Optional[str] = None, **__):
+        """
+        Transcribe an audio file to text using the `Leopard
+        <https://picovoice.ai/docs/leopard/>`_ engine.
+
+        :param text: Text to be transcribed.
+        :param model_file: Override the model file to be used to detect speech
+            in this conversation. If not set, the configured
+            ``speech_model_path`` will be used.
+        :return: dict
+
+          .. code-block:: json
+
+            {
+              "transcription": "This is a test",
+              "words": [
+                {
+                  "word": "this",
+                  "start": 0.06400000303983688,
+                  "end": 0.19200000166893005,
+                  "confidence": 0.9626294374465942
+                },
+                {
+                  "word": "is",
+                  "start": 0.2879999876022339,
+                  "end": 0.35199999809265137,
+                  "confidence": 0.9781675934791565
+                },
+                {
+                  "word": "a",
+                  "start": 0.41600000858306885,
+                  "end": 0.41600000858306885,
+                  "confidence": 0.9764975309371948
+                },
+                {
+                  "word": "test",
+                  "start": 0.5120000243186951,
+                  "end": 0.8320000171661377,
+                  "confidence": 0.9511580467224121
+                }
+              ]
+            }
+
+        """
+        import pvleopard
+
+        audio_file = os.path.expanduser(audio_file)
+        if model_file:
+            model_file = os.path.expanduser(model_file)
+
+        leopard = pvleopard.create(
+            access_key=self._assistant_args['access_key'], model_path=model_file
+        )
+
+        transcript, words = leopard.process_file(audio_file)
+        return {
+            'transcription': transcript,
+            'words': [
+                {
+                    'word': word.word,
+                    'start': word.start_sec,
+                    'end': word.end_sec,
+                    'confidence': word.confidence,
+                }
+                for word in words
+            ],
+        }
+
    @action
    def mute(self, *_, **__):
        """