Added PicoVoice plugin with support for hotwords [see #130]
This commit is contained in:
parent
e04c6fb921
commit
a5c08ed3e4
17 changed files with 265 additions and 61 deletions
|
@ -53,7 +53,9 @@ Backends
|
||||||
platypush/backend/sensor.mcp3008.rst
|
platypush/backend/sensor.mcp3008.rst
|
||||||
platypush/backend/sensor.motion.pwm3901.rst
|
platypush/backend/sensor.motion.pwm3901.rst
|
||||||
platypush/backend/sensor.serial.rst
|
platypush/backend/sensor.serial.rst
|
||||||
|
platypush/backend/stt.rst
|
||||||
platypush/backend/stt.deepspeech.rst
|
platypush/backend/stt.deepspeech.rst
|
||||||
|
platypush/backend/stt.picovoice.rst
|
||||||
platypush/backend/tcp.rst
|
platypush/backend/tcp.rst
|
||||||
platypush/backend/todoist.rst
|
platypush/backend/todoist.rst
|
||||||
platypush/backend/travisci.rst
|
platypush/backend/travisci.rst
|
||||||
|
|
|
@ -242,6 +242,7 @@ autodoc_mock_imports = ['googlesamples.assistant.grpc.audio_helpers',
|
||||||
'openzwave',
|
'openzwave',
|
||||||
'deepspeech',
|
'deepspeech',
|
||||||
'wave',
|
'wave',
|
||||||
|
'pvporcupine ',
|
||||||
]
|
]
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..'))
|
sys.path.insert(0, os.path.abspath('../..'))
|
||||||
|
|
5
docs/source/platypush/backend/stt.picovoice.rst
Normal file
5
docs/source/platypush/backend/stt.picovoice.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``platypush.backend.stt.picovoice``
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.backend.stt.picovoice
|
||||||
|
:members:
|
5
docs/source/platypush/backend/stt.rst
Normal file
5
docs/source/platypush/backend/stt.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``platypush.backend.stt``
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. automodule:: platypush.backend.stt
|
||||||
|
:members:
|
5
docs/source/platypush/plugins/stt.picovoice.rst
Normal file
5
docs/source/platypush/plugins/stt.picovoice.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``platypush.plugins.stt.picovoice``
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.plugins.stt.picovoice
|
||||||
|
:members:
|
5
docs/source/platypush/plugins/stt.rst
Normal file
5
docs/source/platypush/plugins/stt.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``platypush.plugins.stt``
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. automodule:: platypush.plugins.stt
|
||||||
|
:members:
|
|
@ -90,7 +90,9 @@ Plugins
|
||||||
platypush/plugins/serial.rst
|
platypush/plugins/serial.rst
|
||||||
platypush/plugins/shell.rst
|
platypush/plugins/shell.rst
|
||||||
platypush/plugins/sound.rst
|
platypush/plugins/sound.rst
|
||||||
|
platypush/plugins/stt.rst
|
||||||
platypush/plugins/stt.deepspeech.rst
|
platypush/plugins/stt.deepspeech.rst
|
||||||
|
platypush/plugins/stt.picovoice.rst
|
||||||
platypush/plugins/switch.rst
|
platypush/plugins/switch.rst
|
||||||
platypush/plugins/switch.switchbot.rst
|
platypush/plugins/switch.switchbot.rst
|
||||||
platypush/plugins/switch.tplink.rst
|
platypush/plugins/switch.tplink.rst
|
||||||
|
|
|
@ -6,7 +6,6 @@ Responses
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
:caption: Responses:
|
:caption: Responses:
|
||||||
|
|
||||||
platypush/responses/.rst
|
|
||||||
platypush/responses/bluetooth.rst
|
platypush/responses/bluetooth.rst
|
||||||
platypush/responses/camera.rst
|
platypush/responses/camera.rst
|
||||||
platypush/responses/camera.android.rst
|
platypush/responses/camera.android.rst
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from platypush.backend import Backend
|
||||||
|
from platypush.context import get_plugin
|
||||||
|
from platypush.plugins.stt import SttPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class SttBackend(Backend):
|
||||||
|
"""
|
||||||
|
Base class for speech-to-text backends.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, plugin_name: str, retry_sleep: float = 5.0, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
:param plugin_name: Plugin name of the class that will be used for speech detection. Must be an instance of
|
||||||
|
:class:`platypush.plugins.stt.SttPlugin`.
|
||||||
|
:param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
|
||||||
|
(default: 5 seconds).
|
||||||
|
"""
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.plugin_name = plugin_name
|
||||||
|
self.retry_sleep = retry_sleep
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
super().run()
|
||||||
|
self.logger.info('Starting {} speech-to-text backend'.format(self.__class__.__name__))
|
||||||
|
|
||||||
|
while not self.should_stop():
|
||||||
|
try:
|
||||||
|
plugin: SttPlugin = get_plugin(self.plugin_name)
|
||||||
|
with plugin:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
plugin._detection_thread.join()
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.exception(e)
|
||||||
|
self.logger.warning('Encountered an unexpected error, retrying in {} seconds'.format(self.retry_sleep))
|
||||||
|
time.sleep(self.retry_sleep)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
|
@ -1,11 +1,7 @@
|
||||||
import time
|
from platypush.backend.stt import SttBackend
|
||||||
|
|
||||||
from platypush.backend import Backend
|
|
||||||
from platypush.context import get_plugin
|
|
||||||
from platypush.plugins.stt.deepspeech import SttDeepspeechPlugin
|
|
||||||
|
|
||||||
|
|
||||||
class SttDeepspeechBackend(Backend):
|
class SttDeepspeechBackend(SttBackend):
|
||||||
"""
|
"""
|
||||||
Backend for the Mozilla Deepspeech speech-to-text engine plugin. Set this plugin to ``enabled`` if you
|
Backend for the Mozilla Deepspeech speech-to-text engine plugin. Set this plugin to ``enabled`` if you
|
||||||
want to run the speech-to-text engine continuously instead of programmatically using
|
want to run the speech-to-text engine continuously instead of programmatically using
|
||||||
|
@ -18,30 +14,8 @@ class SttDeepspeechBackend(Backend):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, retry_sleep: float = 5.0, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
"""
|
super().__init__('stt.deepspeech', *args, **kwargs)
|
||||||
:param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
|
|
||||||
(default: 5 seconds).
|
|
||||||
"""
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.retry_sleep = retry_sleep
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
super().run()
|
|
||||||
self.logger.info('Starting Mozilla Deepspeech speech-to-text backend')
|
|
||||||
|
|
||||||
while not self.should_stop():
|
|
||||||
try:
|
|
||||||
plugin: SttDeepspeechPlugin = get_plugin('stt.deepspeech')
|
|
||||||
with plugin:
|
|
||||||
# noinspection PyProtectedMember
|
|
||||||
plugin._detection_thread.join()
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.exception(e)
|
|
||||||
self.logger.warning('Deepspeech backend encountered an unexpected error, retrying in {} seconds'.
|
|
||||||
format(self.retry_sleep))
|
|
||||||
|
|
||||||
time.sleep(self.retry_sleep)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
# vim:sw=4:ts=4:et:
|
||||||
|
|
21
platypush/backend/stt/picovoice.py
Normal file
21
platypush/backend/stt/picovoice.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from platypush.backend.stt import SttBackend
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoiceBackend(SttBackend):
|
||||||
|
"""
|
||||||
|
Backend for the PicoVoice speech-to-text engine plugin. Set this plugin to ``enabled`` if you
|
||||||
|
want to run the speech-to-text engine continuously instead of programmatically using
|
||||||
|
``start_detection`` and ``stop_detection``.
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
|
||||||
|
- The :class:`platypush.plugins.stt.deepspeech.SttPicovoicePlugin` plugin configured and its dependencies
|
||||||
|
installed.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__('stt.picovoice', *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
|
@ -138,6 +138,9 @@ class Config(object):
|
||||||
with open(cfgfile, 'r') as fp:
|
with open(cfgfile, 'r') as fp:
|
||||||
file_config = yaml.safe_load(fp)
|
file_config = yaml.safe_load(fp)
|
||||||
|
|
||||||
|
if not file_config:
|
||||||
|
return config
|
||||||
|
|
||||||
for section in file_config:
|
for section in file_config:
|
||||||
if section == 'include':
|
if section == 'include':
|
||||||
include_files = file_config[section] \
|
include_files = file_config[section] \
|
||||||
|
|
|
@ -35,7 +35,7 @@ class SttPlugin(ABC, Plugin):
|
||||||
input_device: Optional[Union[int, str]] = None,
|
input_device: Optional[Union[int, str]] = None,
|
||||||
hotword: Optional[str] = None,
|
hotword: Optional[str] = None,
|
||||||
hotwords: Optional[List[str]] = None,
|
hotwords: Optional[List[str]] = None,
|
||||||
conversation_timeout: Optional[float] = None,
|
conversation_timeout: Optional[float] = 10.0,
|
||||||
block_duration: float = 1.0):
|
block_duration: float = 1.0):
|
||||||
"""
|
"""
|
||||||
:param input_device: PortAudio device index or name that will be used for recording speech (default: default
|
:param input_device: PortAudio device index or name that will be used for recording speech (default: default
|
||||||
|
@ -49,7 +49,7 @@ class SttPlugin(ABC, Plugin):
|
||||||
the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
|
the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
|
||||||
instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
|
instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
|
||||||
here to run any logic depending on the detected speech - it can emulate a kind of
|
here to run any logic depending on the detected speech - it can emulate a kind of
|
||||||
"OK, Google. Turn on the lights" interaction without using an external assistant.
|
"OK, Google. Turn on the lights" interaction without using an external assistant (default: 10 seconds).
|
||||||
:param block_duration: Duration of the acquired audio blocks (default: 1 second).
|
:param block_duration: Duration of the acquired audio blocks (default: 1 second).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -67,6 +67,7 @@ class SttPlugin(ABC, Plugin):
|
||||||
self._recording_thread: Optional[threading.Thread] = None
|
self._recording_thread: Optional[threading.Thread] = None
|
||||||
self._detection_thread: Optional[threading.Thread] = None
|
self._detection_thread: Optional[threading.Thread] = None
|
||||||
self._audio_queue: Optional[queue.Queue] = None
|
self._audio_queue: Optional[queue.Queue] = None
|
||||||
|
self._current_text = ''
|
||||||
|
|
||||||
def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
|
def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
|
||||||
"""
|
"""
|
||||||
|
@ -99,13 +100,13 @@ class SttPlugin(ABC, Plugin):
|
||||||
"""
|
"""
|
||||||
speech = speech.strip()
|
speech = speech.strip()
|
||||||
|
|
||||||
if self._conversation_event.is_set():
|
if speech in self.hotwords:
|
||||||
event = ConversationDetectedEvent(speech=speech)
|
|
||||||
elif speech in self.hotwords:
|
|
||||||
event = HotwordDetectedEvent(hotword=speech)
|
event = HotwordDetectedEvent(hotword=speech)
|
||||||
if self.conversation_timeout:
|
if self.conversation_timeout:
|
||||||
self._conversation_event.set()
|
self._conversation_event.set()
|
||||||
threading.Timer(self.conversation_timeout, lambda: self._conversation_event.clear()).start()
|
threading.Timer(self.conversation_timeout, lambda: self._conversation_event.clear()).start()
|
||||||
|
elif self._conversation_event.is_set():
|
||||||
|
event = ConversationDetectedEvent(speech=speech)
|
||||||
else:
|
else:
|
||||||
event = SpeechDetectedEvent(speech=speech)
|
event = SpeechDetectedEvent(speech=speech)
|
||||||
|
|
||||||
|
@ -122,35 +123,68 @@ class SttPlugin(ABC, Plugin):
|
||||||
"""
|
"""
|
||||||
return frames
|
return frames
|
||||||
|
|
||||||
def on_detection_started(self):
|
def on_detection_started(self) -> None:
|
||||||
"""
|
"""
|
||||||
Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
|
Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
|
||||||
required.
|
required.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def on_detection_ended(self):
|
def on_detection_ended(self) -> None:
|
||||||
"""
|
"""
|
||||||
Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
|
Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def before_recording(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``recording_thread`` starts. Put here any logic that you may want to run before the
|
||||||
|
recording thread starts.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_recording_started(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called after the ``recording_thread`` opens the audio device. Put here any logic that you may want to
|
||||||
|
run after the recording starts.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_recording_ended(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``recording_thread`` stops. Put here any logic that you want to run after the audio
|
||||||
|
device is closed.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def detect_audio(self, frames) -> str:
|
def detect_speech(self, frames) -> str:
|
||||||
"""
|
"""
|
||||||
Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
|
Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
|
||||||
by the derived classes.
|
by the derived classes.
|
||||||
|
|
||||||
:param frames: Audio frames, as returned by ``convert_frames``.
|
:param frames: Audio frames, as returned by ``convert_frames``.
|
||||||
:return: Detected text, as a string.
|
:return: Detected text, as a string. Returns an empty string if no text has been detected.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def process_text(self, text: str) -> None:
|
||||||
|
if (not text and self._current_text) or (text and text == self._current_text):
|
||||||
|
self.on_speech_detected(self._current_text)
|
||||||
|
self._current_text = ''
|
||||||
|
else:
|
||||||
|
if text:
|
||||||
|
if not self._current_text:
|
||||||
|
get_bus().post(SpeechStartedEvent())
|
||||||
|
self.logger.info('Intermediate speech results: [{}]'.format(text))
|
||||||
|
|
||||||
|
self._current_text = text
|
||||||
|
|
||||||
def detection_thread(self) -> None:
|
def detection_thread(self) -> None:
|
||||||
"""
|
"""
|
||||||
This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
|
This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
|
||||||
"""
|
"""
|
||||||
current_text = ''
|
self._current_text = ''
|
||||||
self.logger.debug('Detection thread started')
|
self.logger.debug('Detection thread started')
|
||||||
self.on_detection_started()
|
self.on_detection_started()
|
||||||
|
|
||||||
|
@ -162,41 +196,40 @@ class SttPlugin(ABC, Plugin):
|
||||||
self.logger.warning('Error while feeding audio to the model: {}'.format(str(e)))
|
self.logger.warning('Error while feeding audio to the model: {}'.format(str(e)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
text = self.detect_audio(frames)
|
text = self.detect_speech(frames).strip()
|
||||||
if text == current_text:
|
self.process_text(text)
|
||||||
if current_text:
|
|
||||||
self.on_speech_detected(current_text)
|
|
||||||
|
|
||||||
current_text = ''
|
|
||||||
else:
|
|
||||||
if not current_text:
|
|
||||||
get_bus().post(SpeechStartedEvent())
|
|
||||||
|
|
||||||
self.logger.info('Intermediate speech results: [{}]'.format(text))
|
|
||||||
current_text = text
|
|
||||||
|
|
||||||
self.on_detection_ended()
|
self.on_detection_ended()
|
||||||
self.logger.debug('Detection thread terminated')
|
self.logger.debug('Detection thread terminated')
|
||||||
|
|
||||||
def recording_thread(self, block_duration: float, input_device: Optional[str] = None) -> None:
|
def recording_thread(self, block_duration: Optional[float] = None, block_size: Optional[int] = None,
|
||||||
|
input_device: Optional[str] = None) -> None:
|
||||||
"""
|
"""
|
||||||
Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
|
Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
|
||||||
|
|
||||||
:param block_duration: Audio blocks duration.
|
:param block_duration: Audio blocks duration. Specify either ``block_duration`` or ``block_size``.
|
||||||
|
:param block_size: Size of the audio blocks. Specify either ``block_duration`` or ``block_size``.
|
||||||
:param input_device: Input device
|
:param input_device: Input device
|
||||||
"""
|
"""
|
||||||
|
assert (block_duration or block_size) and not (block_duration and block_size), \
|
||||||
|
'Please specify either block_duration or block_size'
|
||||||
|
|
||||||
|
if not block_size:
|
||||||
|
block_size = int(self.rate * self.channels * block_duration)
|
||||||
|
|
||||||
|
self.before_recording()
|
||||||
self.logger.debug('Recording thread started')
|
self.logger.debug('Recording thread started')
|
||||||
device = self._get_input_device(input_device)
|
device = self._get_input_device(input_device)
|
||||||
blocksize = int(self.rate * self.channels * block_duration)
|
|
||||||
self._input_stream = sd.InputStream(samplerate=self.rate, device=device,
|
self._input_stream = sd.InputStream(samplerate=self.rate, device=device,
|
||||||
channels=self.channels, dtype='int16', latency=0,
|
channels=self.channels, dtype='int16', latency=0,
|
||||||
blocksize=blocksize)
|
blocksize=block_size)
|
||||||
self._input_stream.start()
|
self._input_stream.start()
|
||||||
|
self.on_recording_started()
|
||||||
get_bus().post(SpeechDetectionStartedEvent())
|
get_bus().post(SpeechDetectionStartedEvent())
|
||||||
|
|
||||||
while self._input_stream:
|
while self._input_stream:
|
||||||
try:
|
try:
|
||||||
frames = self._input_stream.read(self.rate)[0]
|
frames = self._input_stream.read(block_size)[0]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning('Error while reading from the audio input: {}'.format(str(e)))
|
self.logger.warning('Error while reading from the audio input: {}'.format(str(e)))
|
||||||
continue
|
continue
|
||||||
|
@ -204,6 +237,7 @@ class SttPlugin(ABC, Plugin):
|
||||||
self._audio_queue.put(frames)
|
self._audio_queue.put(frames)
|
||||||
|
|
||||||
get_bus().post(SpeechDetectionStoppedEvent())
|
get_bus().post(SpeechDetectionStoppedEvent())
|
||||||
|
self.on_recording_ended()
|
||||||
self.logger.debug('Recording thread terminated')
|
self.logger.debug('Recording thread terminated')
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
|
@ -116,7 +116,7 @@ class SttDeepspeechPlugin(SttPlugin):
|
||||||
self._model.finishStream()
|
self._model.finishStream()
|
||||||
self._context = None
|
self._context = None
|
||||||
|
|
||||||
def detect_audio(self, frames) -> str:
|
def detect_speech(self, frames) -> str:
|
||||||
model = self._get_model()
|
model = self._get_model()
|
||||||
context = self._get_context()
|
context = self._get_context()
|
||||||
model.feedAudioContent(context, frames)
|
model.feedAudioContent(context, frames)
|
||||||
|
|
103
platypush/plugins/stt/picovoice.py
Normal file
103
platypush/plugins/stt/picovoice.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from platypush.message.response.stt import SpeechDetectedResponse
|
||||||
|
from platypush.plugins import action
|
||||||
|
from platypush.plugins.stt import SttPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoicePlugin(SttPlugin):
|
||||||
|
"""
|
||||||
|
This plugin performs speech-to-text and speech detection using the
|
||||||
|
`PicoVoice <https://github.com/Picovoice>`_ speech-to-text integrations.
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
|
||||||
|
* **pvporcupine** (``pip install pvporcupine``) for hotword detection.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
library_path: Optional[str] = None,
|
||||||
|
model_file_path: Optional[str] = None,
|
||||||
|
keyword_file_paths: Optional[List[str]] = None,
|
||||||
|
sensitivity: float = 0.5,
|
||||||
|
sensitivities: Optional[List[float]] = None,
|
||||||
|
*args, **kwargs):
|
||||||
|
from pvporcupine import Porcupine
|
||||||
|
from pvporcupine.resources.util.python.util import LIBRARY_PATH, MODEL_FILE_PATH, KEYWORD_FILE_PATHS
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
self.hotwords = list(self.hotwords)
|
||||||
|
self._hotword_engine: Optional[Porcupine] = None
|
||||||
|
self._library_path = os.path.abspath(os.path.expanduser(library_path or LIBRARY_PATH))
|
||||||
|
self._model_file_path = os.path.abspath(os.path.expanduser(model_file_path or MODEL_FILE_PATH))
|
||||||
|
|
||||||
|
if not keyword_file_paths:
|
||||||
|
hotwords = KEYWORD_FILE_PATHS
|
||||||
|
assert all(hotword in hotwords for hotword in self.hotwords), \
|
||||||
|
'Not all the hotwords could be found. Available hotwords: {}'.format(list(hotwords.keys()))
|
||||||
|
|
||||||
|
self._keyword_file_paths = [os.path.abspath(os.path.expanduser(hotwords[hotword]))
|
||||||
|
for hotword in self.hotwords]
|
||||||
|
else:
|
||||||
|
self._keyword_file_paths = [
|
||||||
|
os.path.abspath(os.path.expanduser(p))
|
||||||
|
for p in keyword_file_paths
|
||||||
|
]
|
||||||
|
|
||||||
|
self._sensitivities = []
|
||||||
|
if sensitivities:
|
||||||
|
assert len(self._keyword_file_paths) == len(sensitivities), \
|
||||||
|
'Please specify as many sensitivities as the number of configured hotwords'
|
||||||
|
|
||||||
|
self._sensitivities = sensitivities
|
||||||
|
else:
|
||||||
|
self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
|
||||||
|
|
||||||
|
def convert_frames(self, frames: bytes) -> tuple:
|
||||||
|
assert self._hotword_engine, 'The hotword engine is not running'
|
||||||
|
return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
|
||||||
|
|
||||||
|
def on_detection_ended(self) -> None:
|
||||||
|
if self._hotword_engine:
|
||||||
|
self._hotword_engine.delete()
|
||||||
|
self._hotword_engine = None
|
||||||
|
|
||||||
|
def detect_speech(self, frames: tuple) -> str:
|
||||||
|
index = self._hotword_engine.process(frames)
|
||||||
|
if index < 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
if index is True:
|
||||||
|
index = 0
|
||||||
|
return self.hotwords[index]
|
||||||
|
|
||||||
|
@action
|
||||||
|
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
||||||
|
"""
|
||||||
|
Perform speech-to-text analysis on an audio file.
|
||||||
|
|
||||||
|
:param audio_file: Path to the audio file.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def recording_thread(self, input_device: Optional[str] = None, *args, **kwargs) -> None:
|
||||||
|
assert self._hotword_engine, 'The hotword engine has not yet been initialized'
|
||||||
|
super().recording_thread(block_size=self._hotword_engine.frame_length, input_device=input_device)
|
||||||
|
|
||||||
|
@action
|
||||||
|
def start_detection(self, *args, **kwargs) -> None:
|
||||||
|
from pvporcupine import Porcupine
|
||||||
|
self._hotword_engine = Porcupine(
|
||||||
|
library_path=self._library_path,
|
||||||
|
model_file_path=self._model_file_path,
|
||||||
|
keyword_file_paths=self._keyword_file_paths,
|
||||||
|
sensitivities=self._sensitivities)
|
||||||
|
|
||||||
|
self.rate = self._hotword_engine.sample_rate
|
||||||
|
super().start_detection(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
|
@ -230,7 +230,10 @@ croniter
|
||||||
# Support for Z-Wave
|
# Support for Z-Wave
|
||||||
# python-openzwave
|
# python-openzwave
|
||||||
|
|
||||||
# Support for DeepSpeech
|
# Support for Mozilla DeepSpeech speech-to-text engine
|
||||||
# deepspeech
|
# deepspeech
|
||||||
# numpy
|
# numpy
|
||||||
# sounddevice
|
# sounddevice
|
||||||
|
|
||||||
|
# Support for PicoVoice speech-to-text engine
|
||||||
|
# pvporcupine
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -283,7 +283,9 @@ setup(
|
||||||
'zigbee': ['paho-mqtt'],
|
'zigbee': ['paho-mqtt'],
|
||||||
# Support for Z-Wave
|
# Support for Z-Wave
|
||||||
'zwave': ['python-openzwave'],
|
'zwave': ['python-openzwave'],
|
||||||
# Support for DeepSpeech
|
# Support for Mozilla DeepSpeech speech-to-text engine
|
||||||
'deepspeech': ['deepspeech', 'numpy','sounddevice'],
|
'deepspeech': ['deepspeech', 'numpy','sounddevice'],
|
||||||
|
# Support for PicoVoice speech-to-text engine
|
||||||
|
'picovoice': ['pvporcupine'],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue