Added tts.picovoice plugin.

2024-04-10 20:32:32 +02:00 · 2024-04-10 20:32:32 +02:00 · af875c996e
commit af875c996e
parent a4c911a5d7
2 changed files with 160 additions and 0 deletions
--- a/platypush/plugins/tts/picovoice/init.py
+++ b/platypush/plugins/tts/picovoice/init.py
@ -0,0 +1,138 @@
+import os
+from threading import RLock
+from typing import Optional
+
+import numpy as np
+import pvorca
+import sounddevice as sd
+
+from platypush.config import Config
+from platypush.plugins import action
+from platypush.plugins.tts import TtsPlugin
+
+
+class TtsPicovoicePlugin(TtsPlugin):
+    """
+    This TTS plugin enables you to render text as audio using `Picovoice
+    <https://picovoice.ai>`_'s (still experimental) `Orca TTS engine
+    <https://github.com/Picovoice/orca>`_.
+
+    Take a look at
+    :class:`platypush.plugins.assistant.picovoice.AssistantPicovoicePlugin`
+    for details on how to sign up for a Picovoice account and get the API key.
+
+    Also note that using the TTS features requires you to select Orca from the
+    list of products available for your account on the `Picovoice console
+    <https://console.picovoice.ai>`_.
+    """
+
+    def __init__(
+        self,
+        access_key: Optional[str] = None,
+        model_path: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        :param access_key: Picovoice access key. If it's not specified here,
+            then it must be specified on the configuration of
+            :class:`platypush.plugins.assistant.picovoice.AssistantPicovoicePlugin`.
+        :param model_path: Path of the TTS model file (default: use the default
+            English model).
+        """
+        super().__init__(**kwargs)
+        if not access_key:
+            access_key = Config.get('assistant.picovoice', {}).get('access_key')
+            assert (
+                access_key
+            ), 'No access key specified and no assistant.picovoice plugin found'
+
+        self.model_path = model_path
+        self.access_key = access_key
+        if model_path:
+            model_path = os.path.expanduser(model_path)
+
+        self._stream: Optional[sd.OutputStream] = None
+        self._stream_lock = RLock()
+
+    def _play_audio(self, orca: pvorca.Orca, pcm: np.ndarray):
+        with self._stream_lock:
+            self.stop()
+            self._stream = sd.OutputStream(
+                samplerate=orca.sample_rate,
+                channels=1,
+                dtype='int16',
+            )
+
+        try:
+            self._stream.start()
+            self._stream.write(pcm)
+        except Exception as e:
+            self.logger.warning('Error playing audio: %s: %s', type(e), str(e))
+        finally:
+            try:
+                self.stop()
+                self._stream.close()
+            except Exception as e:
+                self.logger.warning(
+                    'Error stopping audio stream: %s: %s', type(e), str(e)
+                )
+            finally:
+                if self._stream:
+                    self._stream = None
+
+    def get_orca(self, model_path: Optional[str] = None):
+        if not model_path:
+            model_path = self.model_path
+        if model_path:
+            model_path = os.path.expanduser(model_path)
+
+        return pvorca.create(access_key=self.access_key, model_path=model_path)
+
+    @action
+    def say(
+        self,
+        text: str,
+        *_,
+        output_file: Optional[str] = None,
+        speech_rate: Optional[float] = None,
+        model_path: Optional[str] = None,
+        **__,
+    ):
+        """
+        Say some text.
+
+        :param text: Text to say.
+        :param output_file: If set, save the audio to the specified file.
+            Otherwise play it.
+        :param speech_rate: Speech rate (default: None).
+        :param model_path: Path of the TTS model file (default: use the default
+            configured model).
+        """
+        orca = self.get_orca(model_path=model_path)
+        if output_file:
+            orca.synthesize_to_file(
+                text, os.path.expanduser(output_file), speech_rate=speech_rate
+            )
+            return
+
+        self._play_audio(
+            orca=orca,
+            pcm=np.array(
+                orca.synthesize(text, speech_rate=speech_rate),
+                dtype='int16',
+            ),
+        )
+
+    @action
+    def stop(self):
+        """
+        Stop the currently playing audio.
+        """
+        with self._stream_lock:
+            if not self._stream:
+                return
+
+            self._stream.stop()
+
+
+# vim:sw=4:ts=4:et:
--- a/platypush/plugins/tts/picovoice/manifest.yaml
+++ b/platypush/plugins/tts/picovoice/manifest.yaml
@ -0,0 +1,22 @@
+manifest:
+  events: {}
+  install:
+    apk:
+      - ffmpeg
+      - py3-numpy
+    apt:
+      - ffmpeg
+      - python3-numpy
+    dnf:
+      - ffmpeg
+      - python-numpy
+    pacman:
+      - ffmpeg
+      - python-numpy
+      - python-sounddevice
+    pip:
+      - numpy
+      - pvorca
+      - sounddevice
+  package: platypush.plugins.tts.picovoice
+  type: plugin