forked from platypush/platypush
[#304] Removed old Picovoice integrations
This commit is contained in:
parent
dba0acb82e
commit
f0382c73ab
12 changed files with 0 additions and 718 deletions
|
@ -1,40 +0,0 @@
|
||||||
import time
|
|
||||||
|
|
||||||
from platypush.backend import Backend
|
|
||||||
from platypush.context import get_plugin
|
|
||||||
from platypush.plugins.stt import SttPlugin
|
|
||||||
|
|
||||||
|
|
||||||
class SttBackend(Backend):
|
|
||||||
"""
|
|
||||||
Base class for speech-to-text backends.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, plugin_name: str, retry_sleep: float = 5.0, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
:param plugin_name: Plugin name of the class that will be used for speech detection. Must be an instance of
|
|
||||||
:class:`platypush.plugins.stt.SttPlugin`.
|
|
||||||
:param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
|
|
||||||
(default: 5 seconds).
|
|
||||||
"""
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.plugin_name = plugin_name
|
|
||||||
self.retry_sleep = retry_sleep
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
super().run()
|
|
||||||
self.logger.info('Starting {} speech-to-text backend'.format(self.__class__.__name__))
|
|
||||||
|
|
||||||
while not self.should_stop():
|
|
||||||
try:
|
|
||||||
plugin: SttPlugin = get_plugin(self.plugin_name)
|
|
||||||
with plugin:
|
|
||||||
# noinspection PyProtectedMember
|
|
||||||
plugin._detection_thread.join()
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.exception(e)
|
|
||||||
self.logger.warning('Encountered an unexpected error, retrying in {} seconds'.format(self.retry_sleep))
|
|
||||||
time.sleep(self.retry_sleep)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,21 +0,0 @@
|
||||||
from platypush.backend.stt import SttBackend
|
|
||||||
|
|
||||||
|
|
||||||
class SttPicovoiceHotwordBackend(SttBackend):
|
|
||||||
"""
|
|
||||||
Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you
|
|
||||||
want to run the hotword engine continuously instead of programmatically using
|
|
||||||
``start_detection`` and ``stop_detection``.
|
|
||||||
|
|
||||||
Requires:
|
|
||||||
|
|
||||||
- The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies
|
|
||||||
installed.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__('stt.picovoice.hotword', *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,6 +0,0 @@
|
||||||
manifest:
|
|
||||||
events: {}
|
|
||||||
install:
|
|
||||||
pip: []
|
|
||||||
package: platypush.backend.stt.picovoice.hotword
|
|
||||||
type: backend
|
|
|
@ -1,21 +0,0 @@
|
||||||
from platypush.backend.stt import SttBackend
|
|
||||||
|
|
||||||
|
|
||||||
class SttPicovoiceSpeechBackend(SttBackend):
|
|
||||||
"""
|
|
||||||
Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you
|
|
||||||
want to run the speech engine continuously instead of programmatically using
|
|
||||||
``start_detection`` and ``stop_detection``.
|
|
||||||
|
|
||||||
Requires:
|
|
||||||
|
|
||||||
- The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies
|
|
||||||
installed.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__('stt.picovoice.speech', *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,6 +0,0 @@
|
||||||
manifest:
|
|
||||||
events: {}
|
|
||||||
install:
|
|
||||||
pip: []
|
|
||||||
package: platypush.backend.stt.picovoice.speech
|
|
||||||
type: backend
|
|
|
@ -1,336 +0,0 @@
|
||||||
import queue
|
|
||||||
import threading
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Optional, Union, List
|
|
||||||
|
|
||||||
import sounddevice as sd
|
|
||||||
|
|
||||||
from platypush.context import get_bus
|
|
||||||
from platypush.message.event.stt import (
|
|
||||||
SpeechDetectionStartedEvent,
|
|
||||||
SpeechDetectionStoppedEvent,
|
|
||||||
SpeechStartedEvent,
|
|
||||||
SpeechDetectedEvent,
|
|
||||||
HotwordDetectedEvent,
|
|
||||||
ConversationDetectedEvent,
|
|
||||||
)
|
|
||||||
from platypush.message.response.stt import SpeechDetectedResponse
|
|
||||||
from platypush.plugins import Plugin, action
|
|
||||||
|
|
||||||
|
|
||||||
class SttPlugin(ABC, Plugin):
|
|
||||||
"""
|
|
||||||
Abstract class for speech-to-text plugins.
|
|
||||||
"""
|
|
||||||
|
|
||||||
_thread_stop_timeout = 10.0
|
|
||||||
rate = 16000
|
|
||||||
channels = 1
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
input_device: Optional[Union[int, str]] = None,
|
|
||||||
hotword: Optional[str] = None,
|
|
||||||
hotwords: Optional[List[str]] = None,
|
|
||||||
conversation_timeout: Optional[float] = 10.0,
|
|
||||||
block_duration: float = 1.0,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
:param input_device: PortAudio device index or name that will be used for recording speech (default: default
|
|
||||||
system audio input device).
|
|
||||||
:param hotword: When this word is detected, the plugin will trigger a
|
|
||||||
:class:`platypush.message.event.stt.HotwordDetectedEvent` instead of a
|
|
||||||
:class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can use these events for hooking other
|
|
||||||
assistants.
|
|
||||||
:param hotwords: Use a list of hotwords instead of a single one.
|
|
||||||
:param conversation_timeout: If ``hotword`` or ``hotwords`` are set and ``conversation_timeout`` is set,
|
|
||||||
the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
|
|
||||||
instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
|
|
||||||
here to run any logic depending on the detected speech - it can emulate a kind of
|
|
||||||
"OK, Google. Turn on the lights" interaction without using an external assistant (default: 10 seconds).
|
|
||||||
:param block_duration: Duration of the acquired audio blocks (default: 1 second).
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__()
|
|
||||||
self.input_device = input_device
|
|
||||||
self.conversation_timeout = conversation_timeout
|
|
||||||
self.block_duration = block_duration
|
|
||||||
|
|
||||||
self.hotwords = set(hotwords or [])
|
|
||||||
if hotword:
|
|
||||||
self.hotwords = {hotword}
|
|
||||||
|
|
||||||
self._conversation_event = threading.Event()
|
|
||||||
self._input_stream: Optional[sd.InputStream] = None
|
|
||||||
self._recording_thread: Optional[threading.Thread] = None
|
|
||||||
self._detection_thread: Optional[threading.Thread] = None
|
|
||||||
self._audio_queue: Optional[queue.Queue] = None
|
|
||||||
self._current_text = ''
|
|
||||||
|
|
||||||
def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
|
|
||||||
"""
|
|
||||||
Get the index of the input device by index or name.
|
|
||||||
|
|
||||||
:param device: Device index or name. If None is set then the function will return the index of the
|
|
||||||
default audio input device.
|
|
||||||
:return: Index of the audio input device.
|
|
||||||
"""
|
|
||||||
if not device:
|
|
||||||
device = self.input_device
|
|
||||||
if not device:
|
|
||||||
return sd.query_hostapis()[0].get('default_input_device')
|
|
||||||
|
|
||||||
if isinstance(device, int):
|
|
||||||
assert device <= len(sd.query_devices())
|
|
||||||
return device
|
|
||||||
|
|
||||||
for i, dev in enumerate(sd.query_devices()):
|
|
||||||
if dev['name'] == device:
|
|
||||||
return i
|
|
||||||
|
|
||||||
raise AssertionError('Device {} not found'.format(device))
|
|
||||||
|
|
||||||
def on_speech_detected(self, speech: str) -> None:
|
|
||||||
"""
|
|
||||||
Hook called when speech is detected. Triggers the right event depending on the current context.
|
|
||||||
|
|
||||||
:param speech: Detected speech.
|
|
||||||
"""
|
|
||||||
speech = speech.strip()
|
|
||||||
|
|
||||||
if speech in self.hotwords:
|
|
||||||
event = HotwordDetectedEvent(hotword=speech)
|
|
||||||
if self.conversation_timeout:
|
|
||||||
self._conversation_event.set()
|
|
||||||
threading.Timer(
|
|
||||||
self.conversation_timeout, lambda: self._conversation_event.clear()
|
|
||||||
).start()
|
|
||||||
elif self._conversation_event.is_set():
|
|
||||||
event = ConversationDetectedEvent(speech=speech)
|
|
||||||
else:
|
|
||||||
event = SpeechDetectedEvent(speech=speech)
|
|
||||||
|
|
||||||
get_bus().post(event)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def convert_frames(frames: bytes) -> bytes:
|
|
||||||
"""
|
|
||||||
Conversion method for raw audio frames. It just returns the input frames as bytes. Override it if required
|
|
||||||
by your logic.
|
|
||||||
|
|
||||||
:param frames: Input audio frames, as bytes.
|
|
||||||
:return: The audio frames as passed on the input. Override if required.
|
|
||||||
"""
|
|
||||||
return frames
|
|
||||||
|
|
||||||
def on_detection_started(self) -> None:
|
|
||||||
"""
|
|
||||||
Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
|
|
||||||
required.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_detection_ended(self) -> None:
|
|
||||||
"""
|
|
||||||
Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def before_recording(self) -> None:
|
|
||||||
"""
|
|
||||||
Method called when the ``recording_thread`` starts. Put here any logic that you may want to run before the
|
|
||||||
recording thread starts.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_recording_started(self) -> None:
|
|
||||||
"""
|
|
||||||
Method called after the ``recording_thread`` opens the audio device. Put here any logic that you may want to
|
|
||||||
run after the recording starts.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_recording_ended(self) -> None:
|
|
||||||
"""
|
|
||||||
Method called when the ``recording_thread`` stops. Put here any logic that you want to run after the audio
|
|
||||||
device is closed.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def detect_speech(self, frames) -> str:
|
|
||||||
"""
|
|
||||||
Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
|
|
||||||
by the derived classes.
|
|
||||||
|
|
||||||
:param frames: Audio frames, as returned by ``convert_frames``.
|
|
||||||
:return: Detected text, as a string. Returns an empty string if no text has been detected.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def process_text(self, text: str) -> None:
|
|
||||||
if (not text and self._current_text) or (text and text == self._current_text):
|
|
||||||
self.on_speech_detected(self._current_text)
|
|
||||||
self._current_text = ''
|
|
||||||
else:
|
|
||||||
if text:
|
|
||||||
if not self._current_text:
|
|
||||||
get_bus().post(SpeechStartedEvent())
|
|
||||||
self.logger.info('Intermediate speech results: [{}]'.format(text))
|
|
||||||
|
|
||||||
self._current_text = text
|
|
||||||
|
|
||||||
def detection_thread(self) -> None:
|
|
||||||
"""
|
|
||||||
This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
|
|
||||||
"""
|
|
||||||
self._current_text = ''
|
|
||||||
self.logger.debug('Detection thread started')
|
|
||||||
self.on_detection_started()
|
|
||||||
|
|
||||||
while self._audio_queue:
|
|
||||||
try:
|
|
||||||
frames = self._audio_queue.get()
|
|
||||||
frames = self.convert_frames(frames)
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(
|
|
||||||
'Error while feeding audio to the model: {}'.format(str(e))
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
text = self.detect_speech(frames).strip()
|
|
||||||
self.process_text(text)
|
|
||||||
|
|
||||||
self.on_detection_ended()
|
|
||||||
self.logger.debug('Detection thread terminated')
|
|
||||||
|
|
||||||
def recording_thread(
|
|
||||||
self,
|
|
||||||
block_duration: Optional[float] = None,
|
|
||||||
block_size: Optional[int] = None,
|
|
||||||
input_device: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
|
|
||||||
|
|
||||||
:param block_duration: Audio blocks duration. Specify either ``block_duration`` or ``block_size``.
|
|
||||||
:param block_size: Size of the audio blocks. Specify either ``block_duration`` or ``block_size``.
|
|
||||||
:param input_device: Input device
|
|
||||||
"""
|
|
||||||
assert (block_duration or block_size) and not (
|
|
||||||
block_duration and block_size
|
|
||||||
), 'Please specify either block_duration or block_size'
|
|
||||||
|
|
||||||
if not block_size:
|
|
||||||
block_size = int(self.rate * self.channels * block_duration)
|
|
||||||
|
|
||||||
self.before_recording()
|
|
||||||
self.logger.debug('Recording thread started')
|
|
||||||
device = self._get_input_device(input_device)
|
|
||||||
self._input_stream = sd.InputStream(
|
|
||||||
samplerate=self.rate,
|
|
||||||
device=device,
|
|
||||||
channels=self.channels,
|
|
||||||
dtype='int16',
|
|
||||||
latency=0,
|
|
||||||
blocksize=block_size,
|
|
||||||
)
|
|
||||||
self._input_stream.start()
|
|
||||||
self.on_recording_started()
|
|
||||||
get_bus().post(SpeechDetectionStartedEvent())
|
|
||||||
|
|
||||||
while self._input_stream:
|
|
||||||
try:
|
|
||||||
frames = self._input_stream.read(block_size)[0]
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(
|
|
||||||
'Error while reading from the audio input: {}'.format(str(e))
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
self._audio_queue.put(frames)
|
|
||||||
|
|
||||||
get_bus().post(SpeechDetectionStoppedEvent())
|
|
||||||
self.on_recording_ended()
|
|
||||||
self.logger.debug('Recording thread terminated')
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
@action
|
|
||||||
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
|
||||||
"""
|
|
||||||
Perform speech-to-text analysis on an audio file. Must be implemented by the derived classes.
|
|
||||||
|
|
||||||
:param audio_file: Path to the audio file.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
"""
|
|
||||||
Context manager enter. Starts detection and returns self.
|
|
||||||
"""
|
|
||||||
self.start_detection()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
"""
|
|
||||||
Context manager exit. Stops detection.
|
|
||||||
"""
|
|
||||||
self.stop_detection()
|
|
||||||
|
|
||||||
@action
|
|
||||||
def start_detection(
|
|
||||||
self,
|
|
||||||
input_device: Optional[str] = None,
|
|
||||||
seconds: Optional[float] = None,
|
|
||||||
block_duration: Optional[float] = None,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Start the speech detection engine.
|
|
||||||
|
|
||||||
:param input_device: Audio input device name/index override
|
|
||||||
:param seconds: If set, then the detection engine will stop after this many seconds, otherwise it'll
|
|
||||||
start running until ``stop_detection`` is called or application stop.
|
|
||||||
:param block_duration: ``block_duration`` override.
|
|
||||||
"""
|
|
||||||
assert (
|
|
||||||
not self._input_stream and not self._recording_thread
|
|
||||||
), 'Speech detection is already running'
|
|
||||||
block_duration = block_duration or self.block_duration
|
|
||||||
input_device = input_device if input_device is not None else self.input_device
|
|
||||||
self._audio_queue = queue.Queue()
|
|
||||||
self._recording_thread = threading.Thread(
|
|
||||||
target=lambda: self.recording_thread(
|
|
||||||
block_duration=block_duration, input_device=input_device
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
self._recording_thread.start()
|
|
||||||
self._detection_thread = threading.Thread(
|
|
||||||
target=lambda: self.detection_thread()
|
|
||||||
)
|
|
||||||
self._detection_thread.start()
|
|
||||||
|
|
||||||
if seconds:
|
|
||||||
threading.Timer(seconds, lambda: self.stop_detection()).start()
|
|
||||||
|
|
||||||
@action
|
|
||||||
def stop_detection(self) -> None:
|
|
||||||
"""
|
|
||||||
Stop the speech detection engine.
|
|
||||||
"""
|
|
||||||
assert self._input_stream, 'Speech detection is not running'
|
|
||||||
self._input_stream.stop(ignore_errors=True)
|
|
||||||
self._input_stream.close(ignore_errors=True)
|
|
||||||
self._input_stream = None
|
|
||||||
|
|
||||||
if self._recording_thread:
|
|
||||||
self._recording_thread.join(timeout=self._thread_stop_timeout)
|
|
||||||
self._recording_thread = None
|
|
||||||
|
|
||||||
self._audio_queue = None
|
|
||||||
if self._detection_thread:
|
|
||||||
self._detection_thread.join(timeout=self._thread_stop_timeout)
|
|
||||||
self._detection_thread = None
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,120 +0,0 @@
|
||||||
import os
|
|
||||||
import struct
|
|
||||||
from typing import Optional, List
|
|
||||||
|
|
||||||
from platypush.message.response.stt import SpeechDetectedResponse
|
|
||||||
from platypush.plugins import action
|
|
||||||
from platypush.plugins.stt import SttPlugin
|
|
||||||
|
|
||||||
|
|
||||||
class SttPicovoiceHotwordPlugin(SttPlugin):
|
|
||||||
"""
|
|
||||||
This plugin performs hotword detection using `PicoVoice <https://github.com/Picovoice>`_.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
library_path: Optional[str] = None,
|
|
||||||
model_file_path: Optional[str] = None,
|
|
||||||
keyword_file_paths: Optional[List[str]] = None,
|
|
||||||
sensitivity: float = 0.5,
|
|
||||||
sensitivities: Optional[List[float]] = None,
|
|
||||||
*args,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
from pvporcupine import Porcupine
|
|
||||||
from pvporcupine.resources.util.python.util import (
|
|
||||||
LIBRARY_PATH,
|
|
||||||
MODEL_FILE_PATH,
|
|
||||||
KEYWORD_FILE_PATHS,
|
|
||||||
)
|
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
self.hotwords = list(self.hotwords)
|
|
||||||
self._hotword_engine: Optional[Porcupine] = None
|
|
||||||
self._library_path = os.path.abspath(
|
|
||||||
os.path.expanduser(library_path or LIBRARY_PATH)
|
|
||||||
)
|
|
||||||
self._model_file_path = os.path.abspath(
|
|
||||||
os.path.expanduser(model_file_path or MODEL_FILE_PATH)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not keyword_file_paths:
|
|
||||||
hotwords = KEYWORD_FILE_PATHS
|
|
||||||
assert all(
|
|
||||||
hotword in hotwords for hotword in self.hotwords
|
|
||||||
), 'Not all the hotwords could be found. Available hotwords: {}'.format(
|
|
||||||
list(hotwords.keys())
|
|
||||||
)
|
|
||||||
|
|
||||||
self._keyword_file_paths = [
|
|
||||||
os.path.abspath(os.path.expanduser(hotwords[hotword]))
|
|
||||||
for hotword in self.hotwords
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
self._keyword_file_paths = [
|
|
||||||
os.path.abspath(os.path.expanduser(p)) for p in keyword_file_paths
|
|
||||||
]
|
|
||||||
|
|
||||||
self._sensitivities = []
|
|
||||||
if sensitivities:
|
|
||||||
assert len(self._keyword_file_paths) == len(
|
|
||||||
sensitivities
|
|
||||||
), 'Please specify as many sensitivities as the number of configured hotwords'
|
|
||||||
|
|
||||||
self._sensitivities = sensitivities
|
|
||||||
else:
|
|
||||||
self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
|
|
||||||
|
|
||||||
def convert_frames(self, frames: bytes) -> tuple:
|
|
||||||
assert self._hotword_engine, 'The hotword engine is not running'
|
|
||||||
return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
|
|
||||||
|
|
||||||
def on_detection_ended(self) -> None:
|
|
||||||
if self._hotword_engine:
|
|
||||||
self._hotword_engine.delete()
|
|
||||||
self._hotword_engine = None
|
|
||||||
|
|
||||||
def detect_speech(self, frames: tuple) -> str:
|
|
||||||
index = self._hotword_engine.process(frames)
|
|
||||||
if index < 0:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
if index is True:
|
|
||||||
index = 0
|
|
||||||
return self.hotwords[index]
|
|
||||||
|
|
||||||
@action
|
|
||||||
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
|
||||||
"""
|
|
||||||
Perform speech-to-text analysis on an audio file.
|
|
||||||
|
|
||||||
:param audio_file: Path to the audio file.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def recording_thread(
|
|
||||||
self, input_device: Optional[str] = None, *args, **kwargs
|
|
||||||
) -> None:
|
|
||||||
assert self._hotword_engine, 'The hotword engine has not yet been initialized'
|
|
||||||
super().recording_thread(
|
|
||||||
block_size=self._hotword_engine.frame_length, input_device=input_device
|
|
||||||
)
|
|
||||||
|
|
||||||
@action
|
|
||||||
def start_detection(self, *args, **kwargs) -> None:
|
|
||||||
from pvporcupine import Porcupine
|
|
||||||
|
|
||||||
self._hotword_engine = Porcupine(
|
|
||||||
library_path=self._library_path,
|
|
||||||
model_file_path=self._model_file_path,
|
|
||||||
keyword_file_paths=self._keyword_file_paths,
|
|
||||||
sensitivities=self._sensitivities,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.rate = self._hotword_engine.sample_rate
|
|
||||||
super().start_detection(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,7 +0,0 @@
|
||||||
manifest:
|
|
||||||
events: {}
|
|
||||||
install:
|
|
||||||
pip:
|
|
||||||
- pvporcupine
|
|
||||||
package: platypush.plugins.stt.picovoice.hotword
|
|
||||||
type: plugin
|
|
|
@ -1,154 +0,0 @@
|
||||||
import inspect
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import struct
|
|
||||||
import threading
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from platypush.message.event.stt import SpeechStartedEvent
|
|
||||||
|
|
||||||
from platypush.context import get_bus
|
|
||||||
from platypush.message.response.stt import SpeechDetectedResponse
|
|
||||||
from platypush.plugins import action
|
|
||||||
from platypush.plugins.stt import SttPlugin
|
|
||||||
|
|
||||||
|
|
||||||
class SttPicovoiceSpeechPlugin(SttPlugin):
|
|
||||||
"""
|
|
||||||
This plugin performs speech detection using `PicoVoice <https://github.com/Picovoice>`_.
|
|
||||||
NOTE: The PicoVoice product used for real-time speech-to-text (Cheetah) can be used freely for
|
|
||||||
personal applications on x86_64 Linux. Other architectures and operating systems require a commercial license.
|
|
||||||
You can ask for a license `here <https://picovoice.ai/contact.html>`_.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
library_path: Optional[str] = None,
|
|
||||||
acoustic_model_path: Optional[str] = None,
|
|
||||||
language_model_path: Optional[str] = None,
|
|
||||||
license_path: Optional[str] = None,
|
|
||||||
end_of_speech_timeout: int = 1,
|
|
||||||
*args,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
:param library_path: Path to the Cheetah binary library for your OS
|
|
||||||
(default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``).
|
|
||||||
:param acoustic_model_path: Path to the acoustic speech model
|
|
||||||
(default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``).
|
|
||||||
:param language_model_path: Path to the language model
|
|
||||||
(default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``).
|
|
||||||
:param license_path: Path to your PicoVoice license
|
|
||||||
(default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``).
|
|
||||||
:param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering
|
|
||||||
a phrase over (default: 1).
|
|
||||||
"""
|
|
||||||
from pvcheetah import Cheetah
|
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
self._basedir = os.path.abspath(
|
|
||||||
os.path.join(inspect.getfile(Cheetah), '..', '..', '..')
|
|
||||||
)
|
|
||||||
if not library_path:
|
|
||||||
library_path = self._get_library_path()
|
|
||||||
if not language_model_path:
|
|
||||||
language_model_path = os.path.join(
|
|
||||||
self._basedir, 'lib', 'common', 'language_model.pv'
|
|
||||||
)
|
|
||||||
if not acoustic_model_path:
|
|
||||||
acoustic_model_path = os.path.join(
|
|
||||||
self._basedir, 'lib', 'common', 'acoustic_model.pv'
|
|
||||||
)
|
|
||||||
if not license_path:
|
|
||||||
license_path = os.path.join(
|
|
||||||
self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic'
|
|
||||||
)
|
|
||||||
|
|
||||||
self._library_path = library_path
|
|
||||||
self._language_model_path = language_model_path
|
|
||||||
self._acoustic_model_path = acoustic_model_path
|
|
||||||
self._license_path = license_path
|
|
||||||
self._end_of_speech_timeout = end_of_speech_timeout
|
|
||||||
self._stt_engine: Optional[Cheetah] = None
|
|
||||||
self._speech_in_progress = threading.Event()
|
|
||||||
|
|
||||||
def _get_library_path(self) -> str:
|
|
||||||
path = os.path.join(
|
|
||||||
self._basedir, 'lib', platform.system().lower(), platform.machine()
|
|
||||||
)
|
|
||||||
return os.path.join(
|
|
||||||
path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0]
|
|
||||||
)
|
|
||||||
|
|
||||||
def convert_frames(self, frames: bytes) -> tuple:
|
|
||||||
assert self._stt_engine, 'The speech engine is not running'
|
|
||||||
return struct.unpack_from("h" * self._stt_engine.frame_length, frames)
|
|
||||||
|
|
||||||
def on_detection_ended(self) -> None:
|
|
||||||
if self._stt_engine:
|
|
||||||
self._stt_engine.delete()
|
|
||||||
self._stt_engine = None
|
|
||||||
|
|
||||||
def detect_speech(self, frames: tuple) -> str:
|
|
||||||
text, is_endpoint = self._stt_engine.process(frames)
|
|
||||||
text = text.strip()
|
|
||||||
|
|
||||||
if text:
|
|
||||||
if not self._speech_in_progress.is_set():
|
|
||||||
self._speech_in_progress.set()
|
|
||||||
get_bus().post(SpeechStartedEvent())
|
|
||||||
|
|
||||||
self._current_text += ' ' + text.strip()
|
|
||||||
|
|
||||||
if is_endpoint:
|
|
||||||
text = self._stt_engine.flush().strip().strip()
|
|
||||||
if text:
|
|
||||||
self._current_text += ' ' + text
|
|
||||||
|
|
||||||
self._speech_in_progress.clear()
|
|
||||||
if self._current_text:
|
|
||||||
self.on_speech_detected(self._current_text)
|
|
||||||
|
|
||||||
self._current_text = ''
|
|
||||||
|
|
||||||
return self._current_text
|
|
||||||
|
|
||||||
def process_text(self, text: str) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@action
|
|
||||||
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
|
||||||
"""
|
|
||||||
Perform speech-to-text analysis on an audio file.
|
|
||||||
|
|
||||||
:param audio_file: Path to the audio file.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def recording_thread(
|
|
||||||
self, input_device: Optional[str] = None, *args, **kwargs
|
|
||||||
) -> None:
|
|
||||||
assert self._stt_engine, 'The hotword engine has not yet been initialized'
|
|
||||||
super().recording_thread(
|
|
||||||
block_size=self._stt_engine.frame_length, input_device=input_device
|
|
||||||
)
|
|
||||||
|
|
||||||
@action
|
|
||||||
def start_detection(self, *args, **kwargs) -> None:
|
|
||||||
from pvcheetah import Cheetah
|
|
||||||
|
|
||||||
self._stt_engine = Cheetah(
|
|
||||||
library_path=self._library_path,
|
|
||||||
acoustic_model_path=self._acoustic_model_path,
|
|
||||||
language_model_path=self._language_model_path,
|
|
||||||
license_path=self._license_path,
|
|
||||||
endpoint_duration_sec=self._end_of_speech_timeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.rate = self._stt_engine.sample_rate
|
|
||||||
self._speech_in_progress.clear()
|
|
||||||
super().start_detection(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,7 +0,0 @@
|
||||||
manifest:
|
|
||||||
events: {}
|
|
||||||
install:
|
|
||||||
pip:
|
|
||||||
- cheetah
|
|
||||||
package: platypush.plugins.stt.picovoice.speech
|
|
||||||
type: plugin
|
|
Loading…
Reference in a new issue