From ce0f3227ec7eb2c46f8fa433617f16f4c02caab4 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sat, 7 Mar 2020 20:21:32 +0100 Subject: [PATCH] Implemented PicoVoice speech-to-text integration [closes #130] --- docs/source/conf.py | 1 + platypush/backend/stt/picovoice.py | 21 --- platypush/backend/stt/picovoice/__init__.py | 0 platypush/backend/stt/picovoice/hotword.py | 21 +++ platypush/backend/stt/picovoice/speech.py | 21 +++ platypush/plugins/stt/picovoice/__init__.py | 0 .../{picovoice.py => picovoice/hotword.py} | 6 +- platypush/plugins/stt/picovoice/speech.py | 135 ++++++++++++++++++ requirements.txt | 5 +- setup.py | 4 +- 10 files changed, 188 insertions(+), 26 deletions(-) delete mode 100644 platypush/backend/stt/picovoice.py create mode 100644 platypush/backend/stt/picovoice/__init__.py create mode 100644 platypush/backend/stt/picovoice/hotword.py create mode 100644 platypush/backend/stt/picovoice/speech.py create mode 100644 platypush/plugins/stt/picovoice/__init__.py rename platypush/plugins/stt/{picovoice.py => picovoice/hotword.py} (95%) create mode 100644 platypush/plugins/stt/picovoice/speech.py diff --git a/docs/source/conf.py b/docs/source/conf.py index 9f8051075..29381ecec 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -243,6 +243,7 @@ autodoc_mock_imports = ['googlesamples.assistant.grpc.audio_helpers', 'deepspeech', 'wave', 'pvporcupine ', + 'pvcheetah', ] sys.path.insert(0, os.path.abspath('../..')) diff --git a/platypush/backend/stt/picovoice.py b/platypush/backend/stt/picovoice.py deleted file mode 100644 index f39f2552b..000000000 --- a/platypush/backend/stt/picovoice.py +++ /dev/null @@ -1,21 +0,0 @@ -from platypush.backend.stt import SttBackend - - -class SttPicovoiceBackend(SttBackend): - """ - Backend for the PicoVoice speech-to-text engine plugin. Set this plugin to ``enabled`` if you - want to run the speech-to-text engine continuously instead of programmatically using - ``start_detection`` and ``stop_detection``. - - Requires: - - - The :class:`platypush.plugins.stt.deepspeech.SttPicovoicePlugin` plugin configured and its dependencies - installed. - - """ - - def __init__(self, *args, **kwargs): - super().__init__('stt.picovoice', *args, **kwargs) - - -# vim:sw=4:ts=4:et: diff --git a/platypush/backend/stt/picovoice/__init__.py b/platypush/backend/stt/picovoice/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/platypush/backend/stt/picovoice/hotword.py b/platypush/backend/stt/picovoice/hotword.py new file mode 100644 index 000000000..9dc6ae63a --- /dev/null +++ b/platypush/backend/stt/picovoice/hotword.py @@ -0,0 +1,21 @@ +from platypush.backend.stt import SttBackend + + +class SttPicovoiceHotwordBackend(SttBackend): + """ + Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you + want to run the hotword engine continuously instead of programmatically using + ``start_detection`` and ``stop_detection``. + + Requires: + + - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies + installed. + + """ + + def __init__(self, *args, **kwargs): + super().__init__('stt.picovoice.hotword', *args, **kwargs) + + +# vim:sw=4:ts=4:et: diff --git a/platypush/backend/stt/picovoice/speech.py b/platypush/backend/stt/picovoice/speech.py new file mode 100644 index 000000000..28a4b0b1a --- /dev/null +++ b/platypush/backend/stt/picovoice/speech.py @@ -0,0 +1,21 @@ +from platypush.backend.stt import SttBackend + + +class SttPicovoiceSpeechBackend(SttBackend): + """ + Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you + want to run the speech engine continuously instead of programmatically using + ``start_detection`` and ``stop_detection``. + + Requires: + + - The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies + installed. + + """ + + def __init__(self, *args, **kwargs): + super().__init__('stt.picovoice.speech', *args, **kwargs) + + +# vim:sw=4:ts=4:et: diff --git a/platypush/plugins/stt/picovoice/__init__.py b/platypush/plugins/stt/picovoice/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/platypush/plugins/stt/picovoice.py b/platypush/plugins/stt/picovoice/hotword.py similarity index 95% rename from platypush/plugins/stt/picovoice.py rename to platypush/plugins/stt/picovoice/hotword.py index 04388b165..8e0d0c756 100644 --- a/platypush/plugins/stt/picovoice.py +++ b/platypush/plugins/stt/picovoice/hotword.py @@ -7,10 +7,10 @@ from platypush.plugins import action from platypush.plugins.stt import SttPlugin -class SttPicovoicePlugin(SttPlugin): +class SttPicovoiceHotwordPlugin(SttPlugin): """ - This plugin performs speech-to-text and speech detection using the - `PicoVoice `_ speech-to-text integrations. + This plugin performs hotword detection using + `PicoVoice `_. Requires: diff --git a/platypush/plugins/stt/picovoice/speech.py b/platypush/plugins/stt/picovoice/speech.py new file mode 100644 index 000000000..5b63f9491 --- /dev/null +++ b/platypush/plugins/stt/picovoice/speech.py @@ -0,0 +1,135 @@ +import inspect +import os +import platform +import struct +import threading +from typing import Optional + +from platypush.message.event.stt import SpeechStartedEvent + +from platypush.context import get_bus +from platypush.message.response.stt import SpeechDetectedResponse +from platypush.plugins import action +from platypush.plugins.stt import SttPlugin + + +class SttPicovoiceSpeechPlugin(SttPlugin): + """ + This plugin performs speech detection using `PicoVoice `_. + + Requires: + + * **cheetah** (``pip install git+https://github.com/BlackLight/cheetah``) + + """ + + def __init__(self, + library_path: Optional[str] = None, + acoustic_model_path: Optional[str] = None, + language_model_path: Optional[str] = None, + license_path: Optional[str] = None, + end_of_speech_timeout: int = 1, + *args, **kwargs): + """ + :param library_path: Path to the Cheetah binary library for your OS + (default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``). + :param acoustic_model_path: Path to the acoustic speech model + (default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``). + :param language_model_path: Path to the language model + (default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``). + :param license_path: Path to your PicoVoice license + (default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``). + :param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering + a phrase over (default: 1). + """ + from pvcheetah import Cheetah + super().__init__(*args, **kwargs) + + self._basedir = os.path.abspath(os.path.join(inspect.getfile(Cheetah), '..', '..', '..')) + if not library_path: + library_path = self._get_library_path() + if not language_model_path: + language_model_path = os.path.join(self._basedir, 'lib', 'common', 'language_model.pv') + if not acoustic_model_path: + acoustic_model_path = os.path.join(self._basedir, 'lib', 'common', 'acoustic_model.pv') + if not license_path: + license_path = os.path.join(self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic') + + self._library_path = library_path + self._language_model_path = language_model_path + self._acoustic_model_path = acoustic_model_path + self._license_path = license_path + self._end_of_speech_timeout = end_of_speech_timeout + self._stt_engine: Optional[Cheetah] = None + self._speech_in_progress = threading.Event() + + def _get_library_path(self) -> str: + path = os.path.join(self._basedir, 'lib', platform.system().lower(), platform.machine()) + return os.path.join(path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0]) + + def convert_frames(self, frames: bytes) -> tuple: + assert self._stt_engine, 'The speech engine is not running' + return struct.unpack_from("h" * self._stt_engine.frame_length, frames) + + def on_detection_ended(self) -> None: + if self._stt_engine: + self._stt_engine.delete() + self._stt_engine = None + + def detect_speech(self, frames: tuple) -> str: + text, is_endpoint = self._stt_engine.process(frames) + text = text.strip() + + if text: + if not self._speech_in_progress.is_set(): + self._speech_in_progress.set() + get_bus().post(SpeechStartedEvent()) + + self._current_text += ' ' + text.strip() + + if is_endpoint: + text = self._stt_engine.flush().strip().strip() + if text: + self._current_text += ' ' + text + + self._speech_in_progress.clear() + if self._current_text: + self.on_speech_detected(self._current_text) + + self._current_text = '' + + return self._current_text + + def process_text(self, text: str) -> None: + pass + + @action + def detect(self, audio_file: str) -> SpeechDetectedResponse: + """ + Perform speech-to-text analysis on an audio file. + + :param audio_file: Path to the audio file. + """ + pass + + def recording_thread(self, input_device: Optional[str] = None, *args, **kwargs) -> None: + assert self._stt_engine, 'The hotword engine has not yet been initialized' + super().recording_thread(block_size=self._stt_engine.frame_length, input_device=input_device) + + @action + def start_detection(self, *args, **kwargs) -> None: + from pvcheetah import Cheetah + self._stt_engine = Cheetah( + library_path=self._library_path, + acoustic_model_path=self._acoustic_model_path, + language_model_path=self._language_model_path, + license_path=self._license_path, + endpoint_duration_sec=self._end_of_speech_timeout, + ) + + self.rate = self._stt_engine.sample_rate + self._speech_in_progress.clear() + super().start_detection(*args, **kwargs) + + +# vim:sw=4:ts=4:et: diff --git a/requirements.txt b/requirements.txt index eabe15ec1..464d622b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -235,5 +235,8 @@ croniter # numpy # sounddevice -# Support for PicoVoice speech-to-text engine +# Support for PicoVoice hotword engine # pvporcupine + +# Support for PicoVoice speech-to-text engine +# pvcheetah diff --git a/setup.py b/setup.py index 5aeb0b552..45f2fab58 100755 --- a/setup.py +++ b/setup.py @@ -285,7 +285,9 @@ setup( 'zwave': ['python-openzwave'], # Support for Mozilla DeepSpeech speech-to-text engine 'deepspeech': ['deepspeech', 'numpy','sounddevice'], + # Support for PicoVoice hotword detection engine + 'picovoice-hotword': ['pvporcupine'], # Support for PicoVoice speech-to-text engine - 'picovoice': ['pvporcupine'], + 'picovoice-speech': ['pvcheetah @ git+https://github.com/BlackLight/cheetah'], }, )