platypush/platypush/plugins/stt/picovoice.py

104 lines
3.8 KiB
Python

import os
import struct
from typing import Optional, List
from platypush.message.response.stt import SpeechDetectedResponse
from platypush.plugins import action
from platypush.plugins.stt import SttPlugin
class SttPicovoicePlugin(SttPlugin):
"""
This plugin performs speech-to-text and speech detection using the
`PicoVoice <https://github.com/Picovoice>`_ speech-to-text integrations.
Requires:
* **pvporcupine** (``pip install pvporcupine``) for hotword detection.
"""
def __init__(self,
library_path: Optional[str] = None,
model_file_path: Optional[str] = None,
keyword_file_paths: Optional[List[str]] = None,
sensitivity: float = 0.5,
sensitivities: Optional[List[float]] = None,
*args, **kwargs):
from pvporcupine import Porcupine
from pvporcupine.resources.util.python.util import LIBRARY_PATH, MODEL_FILE_PATH, KEYWORD_FILE_PATHS
super().__init__(*args, **kwargs)
self.hotwords = list(self.hotwords)
self._hotword_engine: Optional[Porcupine] = None
self._library_path = os.path.abspath(os.path.expanduser(library_path or LIBRARY_PATH))
self._model_file_path = os.path.abspath(os.path.expanduser(model_file_path or MODEL_FILE_PATH))
if not keyword_file_paths:
hotwords = KEYWORD_FILE_PATHS
assert all(hotword in hotwords for hotword in self.hotwords), \
'Not all the hotwords could be found. Available hotwords: {}'.format(list(hotwords.keys()))
self._keyword_file_paths = [os.path.abspath(os.path.expanduser(hotwords[hotword]))
for hotword in self.hotwords]
else:
self._keyword_file_paths = [
os.path.abspath(os.path.expanduser(p))
for p in keyword_file_paths
]
self._sensitivities = []
if sensitivities:
assert len(self._keyword_file_paths) == len(sensitivities), \
'Please specify as many sensitivities as the number of configured hotwords'
self._sensitivities = sensitivities
else:
self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
def convert_frames(self, frames: bytes) -> tuple:
assert self._hotword_engine, 'The hotword engine is not running'
return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
def on_detection_ended(self) -> None:
if self._hotword_engine:
self._hotword_engine.delete()
self._hotword_engine = None
def detect_speech(self, frames: tuple) -> str:
index = self._hotword_engine.process(frames)
if index < 0:
return ''
if index is True:
index = 0
return self.hotwords[index]
@action
def detect(self, audio_file: str) -> SpeechDetectedResponse:
"""
Perform speech-to-text analysis on an audio file.
:param audio_file: Path to the audio file.
"""
pass
def recording_thread(self, input_device: Optional[str] = None, *args, **kwargs) -> None:
assert self._hotword_engine, 'The hotword engine has not yet been initialized'
super().recording_thread(block_size=self._hotword_engine.frame_length, input_device=input_device)
@action
def start_detection(self, *args, **kwargs) -> None:
from pvporcupine import Porcupine
self._hotword_engine = Porcupine(
library_path=self._library_path,
model_file_path=self._model_file_path,
keyword_file_paths=self._keyword_file_paths,
sensitivities=self._sensitivities)
self.rate = self._hotword_engine.sample_rate
super().start_detection(*args, **kwargs)
# vim:sw=4:ts=4:et: