platypush/platypush/plugins/stt/picovoice.py

104 lines
3.8 KiB
Python
Raw Normal View History

import os
import struct
from typing import Optional, List
from platypush.message.response.stt import SpeechDetectedResponse
from platypush.plugins import action
from platypush.plugins.stt import SttPlugin
class SttPicovoicePlugin(SttPlugin):
"""
This plugin performs speech-to-text and speech detection using the
`PicoVoice <https://github.com/Picovoice>`_ speech-to-text integrations.
Requires:
* **pvporcupine** (``pip install pvporcupine``) for hotword detection.
"""
def __init__(self,
library_path: Optional[str] = None,
model_file_path: Optional[str] = None,
keyword_file_paths: Optional[List[str]] = None,
sensitivity: float = 0.5,
sensitivities: Optional[List[float]] = None,
*args, **kwargs):
from pvporcupine import Porcupine
from pvporcupine.resources.util.python.util import LIBRARY_PATH, MODEL_FILE_PATH, KEYWORD_FILE_PATHS
super().__init__(*args, **kwargs)
self.hotwords = list(self.hotwords)
self._hotword_engine: Optional[Porcupine] = None
self._library_path = os.path.abspath(os.path.expanduser(library_path or LIBRARY_PATH))
self._model_file_path = os.path.abspath(os.path.expanduser(model_file_path or MODEL_FILE_PATH))
if not keyword_file_paths:
hotwords = KEYWORD_FILE_PATHS
assert all(hotword in hotwords for hotword in self.hotwords), \
'Not all the hotwords could be found. Available hotwords: {}'.format(list(hotwords.keys()))
self._keyword_file_paths = [os.path.abspath(os.path.expanduser(hotwords[hotword]))
for hotword in self.hotwords]
else:
self._keyword_file_paths = [
os.path.abspath(os.path.expanduser(p))
for p in keyword_file_paths
]
self._sensitivities = []
if sensitivities:
assert len(self._keyword_file_paths) == len(sensitivities), \
'Please specify as many sensitivities as the number of configured hotwords'
self._sensitivities = sensitivities
else:
self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
def convert_frames(self, frames: bytes) -> tuple:
assert self._hotword_engine, 'The hotword engine is not running'
return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
def on_detection_ended(self) -> None:
if self._hotword_engine:
self._hotword_engine.delete()
self._hotword_engine = None
def detect_speech(self, frames: tuple) -> str:
index = self._hotword_engine.process(frames)
if index < 0:
return ''
if index is True:
index = 0
return self.hotwords[index]
@action
def detect(self, audio_file: str) -> SpeechDetectedResponse:
"""
Perform speech-to-text analysis on an audio file.
:param audio_file: Path to the audio file.
"""
pass
def recording_thread(self, input_device: Optional[str] = None, *args, **kwargs) -> None:
assert self._hotword_engine, 'The hotword engine has not yet been initialized'
super().recording_thread(block_size=self._hotword_engine.frame_length, input_device=input_device)
@action
def start_detection(self, *args, **kwargs) -> None:
from pvporcupine import Porcupine
self._hotword_engine = Porcupine(
library_path=self._library_path,
model_file_path=self._model_file_path,
keyword_file_paths=self._keyword_file_paths,
sensitivities=self._sensitivities)
self.rate = self._hotword_engine.sample_rate
super().start_detection(*args, **kwargs)
# vim:sw=4:ts=4:et: