From ce0f3227ec7eb2c46f8fa433617f16f4c02caab4 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <blacklight86@gmail.com>
Date: Sat, 7 Mar 2020 20:21:32 +0100
Subject: [PATCH] Implemented PicoVoice speech-to-text integration [closes
 #130]

---
 docs/source/conf.py                           |   1 +
 platypush/backend/stt/picovoice.py            |  21 ---
 platypush/backend/stt/picovoice/__init__.py   |   0
 platypush/backend/stt/picovoice/hotword.py    |  21 +++
 platypush/backend/stt/picovoice/speech.py     |  21 +++
 platypush/plugins/stt/picovoice/__init__.py   |   0
 .../{picovoice.py => picovoice/hotword.py}    |   6 +-
 platypush/plugins/stt/picovoice/speech.py     | 135 ++++++++++++++++++
 requirements.txt                              |   5 +-
 setup.py                                      |   4 +-
 10 files changed, 188 insertions(+), 26 deletions(-)
 delete mode 100644 platypush/backend/stt/picovoice.py
 create mode 100644 platypush/backend/stt/picovoice/__init__.py
 create mode 100644 platypush/backend/stt/picovoice/hotword.py
 create mode 100644 platypush/backend/stt/picovoice/speech.py
 create mode 100644 platypush/plugins/stt/picovoice/__init__.py
 rename platypush/plugins/stt/{picovoice.py => picovoice/hotword.py} (95%)
 create mode 100644 platypush/plugins/stt/picovoice/speech.py

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9f8051075..29381ecec 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -243,6 +243,7 @@ autodoc_mock_imports = ['googlesamples.assistant.grpc.audio_helpers',
                         'deepspeech',
                         'wave',
                         'pvporcupine ',
+                        'pvcheetah',
                         ]
 
 sys.path.insert(0, os.path.abspath('../..'))
diff --git a/platypush/backend/stt/picovoice.py b/platypush/backend/stt/picovoice.py
deleted file mode 100644
index f39f2552b..000000000
--- a/platypush/backend/stt/picovoice.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from platypush.backend.stt import SttBackend
-
-
-class SttPicovoiceBackend(SttBackend):
-    """
-    Backend for the PicoVoice speech-to-text engine plugin. Set this plugin to ``enabled`` if you
-    want to run the speech-to-text engine continuously instead of programmatically using
-    ``start_detection`` and ``stop_detection``.
-
-    Requires:
-
-        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoicePlugin` plugin configured and its dependencies
-          installed.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__('stt.picovoice', *args, **kwargs)
-
-
-# vim:sw=4:ts=4:et:
diff --git a/platypush/backend/stt/picovoice/__init__.py b/platypush/backend/stt/picovoice/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/platypush/backend/stt/picovoice/hotword.py b/platypush/backend/stt/picovoice/hotword.py
new file mode 100644
index 000000000..9dc6ae63a
--- /dev/null
+++ b/platypush/backend/stt/picovoice/hotword.py
@@ -0,0 +1,21 @@
+from platypush.backend.stt import SttBackend
+
+
+class SttPicovoiceHotwordBackend(SttBackend):
+    """
+    Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you
+    want to run the hotword engine continuously instead of programmatically using
+    ``start_detection`` and ``stop_detection``.
+
+    Requires:
+
+        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies
+          installed.
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('stt.picovoice.hotword', *args, **kwargs)
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/backend/stt/picovoice/speech.py b/platypush/backend/stt/picovoice/speech.py
new file mode 100644
index 000000000..28a4b0b1a
--- /dev/null
+++ b/platypush/backend/stt/picovoice/speech.py
@@ -0,0 +1,21 @@
+from platypush.backend.stt import SttBackend
+
+
+class SttPicovoiceSpeechBackend(SttBackend):
+    """
+    Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you
+    want to run the speech engine continuously instead of programmatically using
+    ``start_detection`` and ``stop_detection``.
+
+    Requires:
+
+        - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies
+          installed.
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__('stt.picovoice.speech', *args, **kwargs)
+
+
+# vim:sw=4:ts=4:et:
diff --git a/platypush/plugins/stt/picovoice/__init__.py b/platypush/plugins/stt/picovoice/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/platypush/plugins/stt/picovoice.py b/platypush/plugins/stt/picovoice/hotword.py
similarity index 95%
rename from platypush/plugins/stt/picovoice.py
rename to platypush/plugins/stt/picovoice/hotword.py
index 04388b165..8e0d0c756 100644
--- a/platypush/plugins/stt/picovoice.py
+++ b/platypush/plugins/stt/picovoice/hotword.py
@@ -7,10 +7,10 @@ from platypush.plugins import action
 from platypush.plugins.stt import SttPlugin
 
 
-class SttPicovoicePlugin(SttPlugin):
+class SttPicovoiceHotwordPlugin(SttPlugin):
     """
-    This plugin performs speech-to-text and speech detection using the
-    `PicoVoice <https://github.com/Picovoice>`_ speech-to-text integrations.
+    This plugin performs hotword detection using
+    `PicoVoice <https://github.com/Picovoice>`_.
 
     Requires:
 
diff --git a/platypush/plugins/stt/picovoice/speech.py b/platypush/plugins/stt/picovoice/speech.py
new file mode 100644
index 000000000..5b63f9491
--- /dev/null
+++ b/platypush/plugins/stt/picovoice/speech.py
@@ -0,0 +1,135 @@
+import inspect
+import os
+import platform
+import struct
+import threading
+from typing import Optional
+
+from platypush.message.event.stt import SpeechStartedEvent
+
+from platypush.context import get_bus
+from platypush.message.response.stt import SpeechDetectedResponse
+from platypush.plugins import action
+from platypush.plugins.stt import SttPlugin
+
+
+class SttPicovoiceSpeechPlugin(SttPlugin):
+    """
+    This plugin performs speech detection using `PicoVoice <https://github.com/Picovoice>`_.
+
+    Requires:
+
+        * **cheetah** (``pip install git+https://github.com/BlackLight/cheetah``)
+
+    """
+
+    def __init__(self,
+                 library_path: Optional[str] = None,
+                 acoustic_model_path: Optional[str] = None,
+                 language_model_path: Optional[str] = None,
+                 license_path: Optional[str] = None,
+                 end_of_speech_timeout: int = 1,
+                 *args, **kwargs):
+        """
+        :param library_path: Path to the Cheetah binary library for your OS
+            (default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``).
+        :param acoustic_model_path: Path to the acoustic speech model
+            (default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``).
+        :param language_model_path:  Path to the language model
+            (default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``).
+        :param license_path: Path to your PicoVoice license
+            (default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``).
+        :param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering
+            a phrase over (default: 1).
+        """
+        from pvcheetah import Cheetah
+        super().__init__(*args, **kwargs)
+
+        self._basedir = os.path.abspath(os.path.join(inspect.getfile(Cheetah), '..', '..', '..'))
+        if not library_path:
+            library_path = self._get_library_path()
+        if not language_model_path:
+            language_model_path = os.path.join(self._basedir, 'lib', 'common', 'language_model.pv')
+        if not acoustic_model_path:
+            acoustic_model_path = os.path.join(self._basedir, 'lib', 'common', 'acoustic_model.pv')
+        if not license_path:
+            license_path = os.path.join(self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic')
+
+        self._library_path = library_path
+        self._language_model_path = language_model_path
+        self._acoustic_model_path = acoustic_model_path
+        self._license_path = license_path
+        self._end_of_speech_timeout = end_of_speech_timeout
+        self._stt_engine: Optional[Cheetah] = None
+        self._speech_in_progress = threading.Event()
+
+    def _get_library_path(self) -> str:
+        path = os.path.join(self._basedir, 'lib', platform.system().lower(), platform.machine())
+        return os.path.join(path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0])
+
+    def convert_frames(self, frames: bytes) -> tuple:
+        assert self._stt_engine, 'The speech engine is not running'
+        return struct.unpack_from("h" * self._stt_engine.frame_length, frames)
+
+    def on_detection_ended(self) -> None:
+        if self._stt_engine:
+            self._stt_engine.delete()
+        self._stt_engine = None
+
+    def detect_speech(self, frames: tuple) -> str:
+        text, is_endpoint = self._stt_engine.process(frames)
+        text = text.strip()
+
+        if text:
+            if not self._speech_in_progress.is_set():
+                self._speech_in_progress.set()
+                get_bus().post(SpeechStartedEvent())
+
+            self._current_text += ' ' + text.strip()
+
+        if is_endpoint:
+            text = self._stt_engine.flush().strip().strip()
+            if text:
+                self._current_text += ' ' + text
+
+            self._speech_in_progress.clear()
+            if self._current_text:
+                self.on_speech_detected(self._current_text)
+
+            self._current_text = ''
+
+        return self._current_text
+
+    def process_text(self, text: str) -> None:
+        pass
+
+    @action
+    def detect(self, audio_file: str) -> SpeechDetectedResponse:
+        """
+        Perform speech-to-text analysis on an audio file.
+
+        :param audio_file: Path to the audio file.
+        """
+        pass
+
+    def recording_thread(self, input_device: Optional[str] = None, *args, **kwargs) -> None:
+        assert self._stt_engine, 'The hotword engine has not yet been initialized'
+        super().recording_thread(block_size=self._stt_engine.frame_length, input_device=input_device)
+
+    @action
+    def start_detection(self, *args, **kwargs) -> None:
+        from pvcheetah import Cheetah
+        self._stt_engine = Cheetah(
+            library_path=self._library_path,
+            acoustic_model_path=self._acoustic_model_path,
+            language_model_path=self._language_model_path,
+            license_path=self._license_path,
+            endpoint_duration_sec=self._end_of_speech_timeout,
+        )
+
+        self.rate = self._stt_engine.sample_rate
+        self._speech_in_progress.clear()
+        super().start_detection(*args, **kwargs)
+
+
+# vim:sw=4:ts=4:et:
diff --git a/requirements.txt b/requirements.txt
index eabe15ec1..464d622b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -235,5 +235,8 @@ croniter
 # numpy
 # sounddevice
 
-# Support for PicoVoice speech-to-text engine
+# Support for PicoVoice hotword engine
 # pvporcupine
+
+# Support for PicoVoice speech-to-text engine
+# pvcheetah
diff --git a/setup.py b/setup.py
index 5aeb0b552..45f2fab58 100755
--- a/setup.py
+++ b/setup.py
@@ -285,7 +285,9 @@ setup(
         'zwave': ['python-openzwave'],
         # Support for Mozilla DeepSpeech speech-to-text engine
         'deepspeech': ['deepspeech', 'numpy','sounddevice'],
+        # Support for PicoVoice hotword detection engine
+        'picovoice-hotword': ['pvporcupine'],
         # Support for PicoVoice speech-to-text engine
-        'picovoice': ['pvporcupine'],
+        'picovoice-speech': ['pvcheetah @ git+https://github.com/BlackLight/cheetah'],
     },
 )