From f0a780b75927d28be0079a6f19f8f7ac2238a33b Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sun, 14 Apr 2024 01:07:17 +0200 Subject: [PATCH] Added `assistant.picovoice.transcribe` and `assistant.picovoice.say`. --- .../platypush/plugins/assistant.picovoice.rst | 5 + docs/source/platypush/plugins/picovoice.rst | 5 - .../platypush/plugins/tts.picovoice.rst | 5 + docs/source/plugins.rst | 3 +- .../plugins/assistant/picovoice/__init__.py | 94 ++++++++++++++++++- 5 files changed, 101 insertions(+), 11 deletions(-) create mode 100644 docs/source/platypush/plugins/assistant.picovoice.rst delete mode 100644 docs/source/platypush/plugins/picovoice.rst create mode 100644 docs/source/platypush/plugins/tts.picovoice.rst diff --git a/docs/source/platypush/plugins/assistant.picovoice.rst b/docs/source/platypush/plugins/assistant.picovoice.rst new file mode 100644 index 000000000..33f39e988 --- /dev/null +++ b/docs/source/platypush/plugins/assistant.picovoice.rst @@ -0,0 +1,5 @@ +``assistant.picovoice`` +======================= + +.. automodule:: platypush.plugins.assistant.picovoice + :members: diff --git a/docs/source/platypush/plugins/picovoice.rst b/docs/source/platypush/plugins/picovoice.rst deleted file mode 100644 index f1f8acded..000000000 --- a/docs/source/platypush/plugins/picovoice.rst +++ /dev/null @@ -1,5 +0,0 @@ -``picovoice`` -============= - -.. automodule:: platypush.plugins.picovoice - :members: diff --git a/docs/source/platypush/plugins/tts.picovoice.rst b/docs/source/platypush/plugins/tts.picovoice.rst new file mode 100644 index 000000000..afc4def6f --- /dev/null +++ b/docs/source/platypush/plugins/tts.picovoice.rst @@ -0,0 +1,5 @@ +``tts.picovoice`` +================= + +.. automodule:: platypush.plugins.tts.picovoice + :members: diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst index 783cb841e..8394eafce 100644 --- a/docs/source/plugins.rst +++ b/docs/source/plugins.rst @@ -11,6 +11,7 @@ Plugins platypush/plugins/application.rst platypush/plugins/arduino.rst platypush/plugins/assistant.google.rst + platypush/plugins/assistant.picovoice.rst platypush/plugins/autoremote.rst platypush/plugins/bluetooth.rst platypush/plugins/calendar.rst @@ -95,7 +96,6 @@ Plugins platypush/plugins/nmap.rst platypush/plugins/ntfy.rst platypush/plugins/otp.rst - platypush/plugins/picovoice.rst platypush/plugins/pihole.rst platypush/plugins/ping.rst platypush/plugins/printer.cups.rst @@ -134,6 +134,7 @@ Plugins platypush/plugins/tts.rst platypush/plugins/tts.google.rst platypush/plugins/tts.mimic3.rst + platypush/plugins/tts.picovoice.rst platypush/plugins/tv.samsung.ws.rst platypush/plugins/twilio.rst platypush/plugins/udp.rst diff --git a/platypush/plugins/assistant/picovoice/__init__.py b/platypush/plugins/assistant/picovoice/__init__.py index 3c7948d2d..d63edbdb2 100644 --- a/platypush/plugins/assistant/picovoice/__init__.py +++ b/platypush/plugins/assistant/picovoice/__init__.py @@ -82,26 +82,26 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin): to the products available in your Picovoice account. :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok google``...). This is required if the wake-word engine is enabled. - See the `Picovoice repository + See the `Porcupine keywords repository `_). for a list of the stock keywords available. If you have a custom model, you can pass its path to the ``keyword_paths`` parameter and its filename (without the path and the platform extension) here. :param keyword_paths: List of paths to the keyword files to listen for. - Custom keyword files can be created using the `Picovoice console + Custom keyword files can be created using the `Porcupine console `_ and downloaded from the console itself. :param keyword_model_path: If you are using a keyword file in a non-English language, you can provide the path to the model file for its language. Model files are available for all the supported - languages through the `Picovoice repository + languages through the `Porcupine lib repository `_. :param speech_model_path: Path to the speech model file. If you are using a language other than English, you can provide the path to the model file for that language. Model files are available for all the - supported languages through the `Picovoice repository + supported languages through the `Cheetah repository `_. - You can also use the `Picovoice console + You can also use the `Speech console `_ to train your custom models. You can use a base model and fine-tune it by boosting the detection of your own words and phrases and edit @@ -215,6 +215,90 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin): else: self._assistant.state = AssistantState.IDLE + @action + def say(self, text: str, *args, **kwargs): + """ + Proxy to + :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin.say` to + render some text as speech through the Picovoice TTS engine. + + Extra arguments to + :class:`platypush.plugins.tts.picovoice.TtsPicovoicePlugin.say` can be + passed over ``args`` and ``kwargs``. + + :param text: Text to be rendered as speech. + """ + return self.tts.say(text, *args, **kwargs) + + @action + def transcribe(self, audio_file: str, *_, model_file: Optional[str] = None, **__): + """ + Transcribe an audio file to text using the `Leopard + `_ engine. + + :param text: Text to be transcribed. + :param model_file: Override the model file to be used to detect speech + in this conversation. If not set, the configured + ``speech_model_path`` will be used. + :return: dict + + .. code-block:: json + + { + "transcription": "This is a test", + "words": [ + { + "word": "this", + "start": 0.06400000303983688, + "end": 0.19200000166893005, + "confidence": 0.9626294374465942 + }, + { + "word": "is", + "start": 0.2879999876022339, + "end": 0.35199999809265137, + "confidence": 0.9781675934791565 + }, + { + "word": "a", + "start": 0.41600000858306885, + "end": 0.41600000858306885, + "confidence": 0.9764975309371948 + }, + { + "word": "test", + "start": 0.5120000243186951, + "end": 0.8320000171661377, + "confidence": 0.9511580467224121 + } + ] + } + + """ + import pvleopard + + audio_file = os.path.expanduser(audio_file) + if model_file: + model_file = os.path.expanduser(model_file) + + leopard = pvleopard.create( + access_key=self._assistant_args['access_key'], model_path=model_file + ) + + transcript, words = leopard.process_file(audio_file) + return { + 'transcription': transcript, + 'words': [ + { + 'word': word.word, + 'start': word.start_sec, + 'end': word.end_sec, + 'confidence': word.confidence, + } + for word in words + ], + } + @action def mute(self, *_, **__): """