forked from platypush/platypush
Compare commits
2 commits
304-new-pi
...
master
Author | SHA1 | Date | |
---|---|---|---|
b4d0716bc5 | |||
584f226b62 |
29 changed files with 767 additions and 728 deletions
|
@ -10,4 +10,6 @@ Backends
|
||||||
platypush/backend/midi.rst
|
platypush/backend/midi.rst
|
||||||
platypush/backend/nodered.rst
|
platypush/backend/nodered.rst
|
||||||
platypush/backend/redis.rst
|
platypush/backend/redis.rst
|
||||||
|
platypush/backend/stt.picovoice.hotword.rst
|
||||||
|
platypush/backend/stt.picovoice.speech.rst
|
||||||
platypush/backend/tcp.rst
|
platypush/backend/tcp.rst
|
||||||
|
|
5
docs/source/platypush/backend/stt.picovoice.hotword.rst
Normal file
5
docs/source/platypush/backend/stt.picovoice.hotword.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``stt.picovoice.hotword``
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.backend.stt.picovoice.hotword
|
||||||
|
:members:
|
5
docs/source/platypush/backend/stt.picovoice.speech.rst
Normal file
5
docs/source/platypush/backend/stt.picovoice.speech.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``stt.picovoice.speech``
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.backend.stt.picovoice.speech
|
||||||
|
:members:
|
|
@ -1,5 +0,0 @@
|
||||||
``picovoice``
|
|
||||||
=============
|
|
||||||
|
|
||||||
.. automodule:: platypush.plugins.picovoice
|
|
||||||
:members:
|
|
5
docs/source/platypush/plugins/stt.picovoice.hotword.rst
Normal file
5
docs/source/platypush/plugins/stt.picovoice.hotword.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``stt.picovoice.hotword``
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.plugins.stt.picovoice.hotword
|
||||||
|
:members:
|
5
docs/source/platypush/plugins/stt.picovoice.speech.rst
Normal file
5
docs/source/platypush/plugins/stt.picovoice.speech.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``stt.picovoice.speech``
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
.. automodule:: platypush.plugins.stt.picovoice.speech
|
||||||
|
:members:
|
|
@ -95,7 +95,6 @@ Plugins
|
||||||
platypush/plugins/nmap.rst
|
platypush/plugins/nmap.rst
|
||||||
platypush/plugins/ntfy.rst
|
platypush/plugins/ntfy.rst
|
||||||
platypush/plugins/otp.rst
|
platypush/plugins/otp.rst
|
||||||
platypush/plugins/picovoice.rst
|
|
||||||
platypush/plugins/pihole.rst
|
platypush/plugins/pihole.rst
|
||||||
platypush/plugins/ping.rst
|
platypush/plugins/ping.rst
|
||||||
platypush/plugins/printer.cups.rst
|
platypush/plugins/printer.cups.rst
|
||||||
|
@ -120,6 +119,8 @@ Plugins
|
||||||
platypush/plugins/smartthings.rst
|
platypush/plugins/smartthings.rst
|
||||||
platypush/plugins/sound.rst
|
platypush/plugins/sound.rst
|
||||||
platypush/plugins/ssh.rst
|
platypush/plugins/ssh.rst
|
||||||
|
platypush/plugins/stt.picovoice.hotword.rst
|
||||||
|
platypush/plugins/stt.picovoice.speech.rst
|
||||||
platypush/plugins/sun.rst
|
platypush/plugins/sun.rst
|
||||||
platypush/plugins/switch.tplink.rst
|
platypush/plugins/switch.tplink.rst
|
||||||
platypush/plugins/switch.wemo.rst
|
platypush/plugins/switch.wemo.rst
|
||||||
|
|
40
platypush/backend/stt/__init__.py
Normal file
40
platypush/backend/stt/__init__.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from platypush.backend import Backend
|
||||||
|
from platypush.context import get_plugin
|
||||||
|
from platypush.plugins.stt import SttPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class SttBackend(Backend):
|
||||||
|
"""
|
||||||
|
Base class for speech-to-text backends.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, plugin_name: str, retry_sleep: float = 5.0, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
:param plugin_name: Plugin name of the class that will be used for speech detection. Must be an instance of
|
||||||
|
:class:`platypush.plugins.stt.SttPlugin`.
|
||||||
|
:param retry_sleep: Number of seconds the backend will wait on failure before re-initializing the plugin
|
||||||
|
(default: 5 seconds).
|
||||||
|
"""
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.plugin_name = plugin_name
|
||||||
|
self.retry_sleep = retry_sleep
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
super().run()
|
||||||
|
self.logger.info('Starting {} speech-to-text backend'.format(self.__class__.__name__))
|
||||||
|
|
||||||
|
while not self.should_stop():
|
||||||
|
try:
|
||||||
|
plugin: SttPlugin = get_plugin(self.plugin_name)
|
||||||
|
with plugin:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
plugin._detection_thread.join()
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.exception(e)
|
||||||
|
self.logger.warning('Encountered an unexpected error, retrying in {} seconds'.format(self.retry_sleep))
|
||||||
|
time.sleep(self.retry_sleep)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
0
platypush/backend/stt/picovoice/__init__.py
Normal file
0
platypush/backend/stt/picovoice/__init__.py
Normal file
21
platypush/backend/stt/picovoice/hotword/__init__.py
Normal file
21
platypush/backend/stt/picovoice/hotword/__init__.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from platypush.backend.stt import SttBackend
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoiceHotwordBackend(SttBackend):
|
||||||
|
"""
|
||||||
|
Backend for the PicoVoice hotword detection plugin. Set this plugin to ``enabled`` if you
|
||||||
|
want to run the hotword engine continuously instead of programmatically using
|
||||||
|
``start_detection`` and ``stop_detection``.
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
|
||||||
|
- The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceHotwordPlugin` plugin configured and its dependencies
|
||||||
|
installed.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__('stt.picovoice.hotword', *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
6
platypush/backend/stt/picovoice/hotword/manifest.yaml
Normal file
6
platypush/backend/stt/picovoice/hotword/manifest.yaml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
manifest:
|
||||||
|
events: {}
|
||||||
|
install:
|
||||||
|
pip: []
|
||||||
|
package: platypush.backend.stt.picovoice.hotword
|
||||||
|
type: backend
|
21
platypush/backend/stt/picovoice/speech/__init__.py
Normal file
21
platypush/backend/stt/picovoice/speech/__init__.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from platypush.backend.stt import SttBackend
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoiceSpeechBackend(SttBackend):
|
||||||
|
"""
|
||||||
|
Backend for the PicoVoice speech detection plugin. Set this plugin to ``enabled`` if you
|
||||||
|
want to run the speech engine continuously instead of programmatically using
|
||||||
|
``start_detection`` and ``stop_detection``.
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
|
||||||
|
- The :class:`platypush.plugins.stt.deepspeech.SttPicovoiceSpeechPlugin` plugin configured and its dependencies
|
||||||
|
installed.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__('stt.picovoice.speech', *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
6
platypush/backend/stt/picovoice/speech/manifest.yaml
Normal file
6
platypush/backend/stt/picovoice/speech/manifest.yaml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
manifest:
|
||||||
|
events: {}
|
||||||
|
install:
|
||||||
|
pip: []
|
||||||
|
package: platypush.backend.stt.picovoice.speech
|
||||||
|
type: backend
|
|
@ -9,22 +9,21 @@ from platypush.context import get_bus, get_plugin
|
||||||
from platypush.entities.assistants import Assistant
|
from platypush.entities.assistants import Assistant
|
||||||
from platypush.entities.managers.assistants import AssistantEntityManager
|
from platypush.entities.managers.assistants import AssistantEntityManager
|
||||||
from platypush.message.event.assistant import (
|
from platypush.message.event.assistant import (
|
||||||
AlarmEndEvent,
|
|
||||||
AlarmStartedEvent,
|
|
||||||
AlertEndEvent,
|
|
||||||
AlertStartedEvent,
|
|
||||||
AssistantEvent,
|
AssistantEvent,
|
||||||
ConversationEndEvent,
|
|
||||||
ConversationStartEvent,
|
ConversationStartEvent,
|
||||||
|
ConversationEndEvent,
|
||||||
ConversationTimeoutEvent,
|
ConversationTimeoutEvent,
|
||||||
HotwordDetectedEvent,
|
ResponseEvent,
|
||||||
|
NoResponseEvent,
|
||||||
|
SpeechRecognizedEvent,
|
||||||
|
AlarmStartedEvent,
|
||||||
|
AlarmEndEvent,
|
||||||
|
TimerStartedEvent,
|
||||||
|
TimerEndEvent,
|
||||||
|
AlertStartedEvent,
|
||||||
|
AlertEndEvent,
|
||||||
MicMutedEvent,
|
MicMutedEvent,
|
||||||
MicUnmutedEvent,
|
MicUnmutedEvent,
|
||||||
NoResponseEvent,
|
|
||||||
ResponseEvent,
|
|
||||||
SpeechRecognizedEvent,
|
|
||||||
TimerEndEvent,
|
|
||||||
TimerStartedEvent,
|
|
||||||
)
|
)
|
||||||
from platypush.plugins import Plugin, action
|
from platypush.plugins import Plugin, action
|
||||||
from platypush.utils import get_plugin_name_by_class
|
from platypush.utils import get_plugin_name_by_class
|
||||||
|
@ -236,9 +235,6 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
|
||||||
self.stop_conversation()
|
self.stop_conversation()
|
||||||
tts.say(text=text, **self.tts_plugin_args)
|
tts.say(text=text, **self.tts_plugin_args)
|
||||||
|
|
||||||
def _on_hotword_detected(self, hotword: Optional[str]):
|
|
||||||
self._send_event(HotwordDetectedEvent, hotword=hotword)
|
|
||||||
|
|
||||||
def _on_speech_recognized(self, phrase: Optional[str]):
|
def _on_speech_recognized(self, phrase: Optional[str]):
|
||||||
phrase = (phrase or '').lower().strip()
|
phrase = (phrase or '').lower().strip()
|
||||||
self._last_query = phrase
|
self._last_query = phrase
|
||||||
|
|
|
@ -1,234 +0,0 @@
|
||||||
from typing import Optional, Sequence
|
|
||||||
|
|
||||||
from platypush.plugins import RunnablePlugin, action
|
|
||||||
from platypush.plugins.assistant import AssistantPlugin
|
|
||||||
|
|
||||||
from ._assistant import Assistant
|
|
||||||
from ._state import AssistantState
|
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-ancestors
|
|
||||||
class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
|
||||||
"""
|
|
||||||
A voice assistant that runs on your device, based on the `Picovoice
|
|
||||||
<https://picovoice.ai/>`_ engine.
|
|
||||||
|
|
||||||
.. note:: You will need a PicoVoice account and a personal access key to
|
|
||||||
use this integration.
|
|
||||||
|
|
||||||
You can get your personal access key by signing up at the `Picovoice
|
|
||||||
console <https://console.picovoice.ai/>`_. You may be asked to submit a
|
|
||||||
reason for using the service (feel free to mention a personal Platypush
|
|
||||||
integration), and you will receive your personal access key.
|
|
||||||
|
|
||||||
You may also be asked to select which products you want to use. The default
|
|
||||||
configuration of this plugin requires the following:
|
|
||||||
|
|
||||||
* **Porcupine**: wake-word engine, if you want the device to listen for
|
|
||||||
a specific wake word in order to start the assistant.
|
|
||||||
|
|
||||||
* **Cheetah**: speech-to-text engine, if you want your voice
|
|
||||||
interactions to be transcribed into free text - either programmatically
|
|
||||||
or when triggered by the wake word. Or:
|
|
||||||
|
|
||||||
* **Rhino**: intent recognition engine, if you want to extract *intents*
|
|
||||||
out of your voice commands - for instance, the phrase "set the living
|
|
||||||
room temperature to 20 degrees" could be mapped to the intent with the
|
|
||||||
following parameters: ``intent``: ``set_temperature``, ``room``:
|
|
||||||
``living_room``, ``temperature``: ``20``.
|
|
||||||
|
|
||||||
* **Leopard**: speech-to-text engine aimed at offline transcription of
|
|
||||||
audio files rather than real-time transcription.
|
|
||||||
|
|
||||||
* **Orca**: text-to-speech engine, if you want to create your custom
|
|
||||||
logic to respond to user's voice commands and render the responses as
|
|
||||||
audio.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
access_key: str,
|
|
||||||
hotword_enabled: bool = True,
|
|
||||||
stt_enabled: bool = True,
|
|
||||||
intent_enabled: bool = False,
|
|
||||||
keywords: Optional[Sequence[str]] = None,
|
|
||||||
keyword_paths: Optional[Sequence[str]] = None,
|
|
||||||
keyword_model_path: Optional[str] = None,
|
|
||||||
speech_model_path: Optional[str] = None,
|
|
||||||
endpoint_duration: Optional[float] = 0.5,
|
|
||||||
enable_automatic_punctuation: bool = False,
|
|
||||||
start_conversation_on_hotword: bool = True,
|
|
||||||
audio_queue_size: int = 100,
|
|
||||||
conversation_timeout: Optional[float] = 7.5,
|
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
:param access_key: Your Picovoice access key. You can get it by signing
|
|
||||||
up at the `Picovoice console <https://console.picovoice.ai/>`.
|
|
||||||
:param hotword_enabled: Enable the wake-word engine (default: True).
|
|
||||||
**Note**: The wake-word engine requires you to add Porcupine to the
|
|
||||||
products available in your Picovoice account.
|
|
||||||
:param stt_enabled: Enable the speech-to-text engine (default: True).
|
|
||||||
**Note**: The speech-to-text engine requires you to add Cheetah to
|
|
||||||
the products available in your Picovoice account.
|
|
||||||
:param intent_enabled: Enable the intent recognition engine (default:
|
|
||||||
False).
|
|
||||||
**Note**: The intent recognition engine requires you to add Rhino
|
|
||||||
to the products available in your Picovoice account.
|
|
||||||
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
|
|
||||||
google``...). This is required if the wake-word engine is enabled.
|
|
||||||
See the `Picovoice repository
|
|
||||||
<https://github.com/Picovoice/porcupine/tree/master/resources/keyword_files>`_).
|
|
||||||
for a list of the stock keywords available. If you have a custom
|
|
||||||
model, you can pass its path to the ``keyword_paths`` parameter and
|
|
||||||
its filename (without the path and the platform extension) here.
|
|
||||||
:param keyword_paths: List of paths to the keyword files to listen for.
|
|
||||||
Custom keyword files can be created using the `Picovoice console
|
|
||||||
<https://console.picovoice.ai/ppn>`_ and downloaded from the
|
|
||||||
console itself.
|
|
||||||
:param keyword_model_path: If you are using a keyword file in a
|
|
||||||
non-English language, you can provide the path to the model file
|
|
||||||
for its language. Model files are available for all the supported
|
|
||||||
languages through the `Picovoice repository
|
|
||||||
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
|
|
||||||
:param speech_model_path: Path to the speech model file. If you are
|
|
||||||
using a language other than English, you can provide the path to the
|
|
||||||
model file for that language. Model files are available for all the
|
|
||||||
supported languages through the `Picovoice repository
|
|
||||||
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
|
|
||||||
:param endpoint_duration: If set, the assistant will stop listening when
|
|
||||||
no speech is detected for the specified duration (in seconds) after
|
|
||||||
the end of an utterance.
|
|
||||||
:param enable_automatic_punctuation: Enable automatic punctuation
|
|
||||||
insertion.
|
|
||||||
:param start_conversation_on_hotword: If set to True (default), a speech
|
|
||||||
detection session will be started when the hotword is detected. If
|
|
||||||
set to False, you may want to start the conversation programmatically
|
|
||||||
by calling the :meth:`.start_conversation` method instead, or run any
|
|
||||||
custom logic hotword detection logic. This can be particularly useful
|
|
||||||
when you want to run the assistant in a push-to-talk mode, or when you
|
|
||||||
want different hotwords to trigger conversations with different models
|
|
||||||
or languages.
|
|
||||||
:param audio_queue_size: Maximum number of audio frames to hold in the
|
|
||||||
processing queue. You may want to increase this value if you are
|
|
||||||
running this integration on a slow device and/or the logs report
|
|
||||||
audio frame drops too often. Keep in mind that increasing this value
|
|
||||||
will increase the memory usage of the integration. Also, a higher
|
|
||||||
value may result in higher accuracy at the cost of higher latency.
|
|
||||||
:param conversation_timeout: Maximum time to wait for some speech to be
|
|
||||||
detected after the hotword is detected. If no speech is detected
|
|
||||||
within this time, the conversation will time out and the plugin will
|
|
||||||
go back into hotword detection mode, if the mode is enabled. Default:
|
|
||||||
7.5 seconds.
|
|
||||||
"""
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
self._assistant = None
|
|
||||||
self._assistant_args = {
|
|
||||||
'stop_event': self._should_stop,
|
|
||||||
'access_key': access_key,
|
|
||||||
'hotword_enabled': hotword_enabled,
|
|
||||||
'stt_enabled': stt_enabled,
|
|
||||||
'intent_enabled': intent_enabled,
|
|
||||||
'keywords': keywords,
|
|
||||||
'keyword_paths': keyword_paths,
|
|
||||||
'keyword_model_path': keyword_model_path,
|
|
||||||
'speech_model_path': speech_model_path,
|
|
||||||
'endpoint_duration': endpoint_duration,
|
|
||||||
'enable_automatic_punctuation': enable_automatic_punctuation,
|
|
||||||
'start_conversation_on_hotword': start_conversation_on_hotword,
|
|
||||||
'audio_queue_size': audio_queue_size,
|
|
||||||
'conversation_timeout': conversation_timeout,
|
|
||||||
'on_conversation_start': self._on_conversation_start,
|
|
||||||
'on_conversation_end': self._on_conversation_end,
|
|
||||||
'on_conversation_timeout': self._on_conversation_timeout,
|
|
||||||
'on_speech_recognized': self._on_speech_recognized,
|
|
||||||
'on_hotword_detected': self._on_hotword_detected,
|
|
||||||
}
|
|
||||||
|
|
||||||
@action
|
|
||||||
def start_conversation(self, *_, **__):
|
|
||||||
"""
|
|
||||||
Programmatically start a conversation with the assistant
|
|
||||||
"""
|
|
||||||
if not self._assistant:
|
|
||||||
self.logger.warning('Assistant not initialized')
|
|
||||||
return
|
|
||||||
|
|
||||||
self._assistant.state = AssistantState.DETECTING_SPEECH
|
|
||||||
|
|
||||||
@action
|
|
||||||
def stop_conversation(self, *_, **__):
|
|
||||||
"""
|
|
||||||
Programmatically stop a running conversation with the assistant
|
|
||||||
"""
|
|
||||||
if not self._assistant:
|
|
||||||
self.logger.warning('Assistant not initialized')
|
|
||||||
return
|
|
||||||
|
|
||||||
if self._assistant.hotword_enabled:
|
|
||||||
self._assistant.state = AssistantState.DETECTING_HOTWORD
|
|
||||||
else:
|
|
||||||
self._assistant.state = AssistantState.IDLE
|
|
||||||
|
|
||||||
@action
|
|
||||||
def mute(self, *_, **__):
|
|
||||||
"""
|
|
||||||
Mute the microphone. Alias for :meth:`.set_mic_mute` with
|
|
||||||
``muted=True``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@action
|
|
||||||
def unmute(self, *_, **__):
|
|
||||||
"""
|
|
||||||
Unmute the microphone. Alias for :meth:`.set_mic_mute` with
|
|
||||||
``muted=False``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@action
|
|
||||||
def set_mic_mute(self, muted: bool):
|
|
||||||
"""
|
|
||||||
Programmatically mute/unmute the microphone.
|
|
||||||
|
|
||||||
:param muted: Set to True or False.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@action
|
|
||||||
def toggle_mute(self, *_, **__):
|
|
||||||
"""
|
|
||||||
Toggle the mic mute state.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@action
|
|
||||||
def send_text_query(self, *_, query: str, **__):
|
|
||||||
"""
|
|
||||||
Send a text query to the assistant.
|
|
||||||
|
|
||||||
This is equivalent to saying something to the assistant.
|
|
||||||
|
|
||||||
:param query: Query to be sent.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def main(self):
|
|
||||||
while not self.should_stop():
|
|
||||||
self.logger.info('Starting Picovoice assistant')
|
|
||||||
with Assistant(**self._assistant_args) as self._assistant:
|
|
||||||
try:
|
|
||||||
for event in self._assistant:
|
|
||||||
self.logger.debug('Picovoice assistant event: %s', event)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error('Picovoice assistant error: %s', e, exc_info=True)
|
|
||||||
self.wait_stop(5)
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
try:
|
|
||||||
self.stop_conversation()
|
|
||||||
except RuntimeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
super().stop()
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,308 +0,0 @@
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from threading import Event, RLock
|
|
||||||
from time import time
|
|
||||||
from typing import Any, Dict, Optional, Sequence
|
|
||||||
|
|
||||||
import pvcheetah
|
|
||||||
import pvleopard
|
|
||||||
import pvporcupine
|
|
||||||
import pvrhino
|
|
||||||
|
|
||||||
from platypush.message.event.assistant import (
|
|
||||||
ConversationTimeoutEvent,
|
|
||||||
HotwordDetectedEvent,
|
|
||||||
SpeechRecognizedEvent,
|
|
||||||
)
|
|
||||||
|
|
||||||
from ._context import ConversationContext
|
|
||||||
from ._recorder import AudioRecorder
|
|
||||||
from ._state import AssistantState
|
|
||||||
|
|
||||||
|
|
||||||
class Assistant:
|
|
||||||
"""
|
|
||||||
A facade class that wraps the Picovoice engines under an assistant API.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _default_callback(*_, **__):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
access_key: str,
|
|
||||||
stop_event: Event,
|
|
||||||
hotword_enabled: bool = True,
|
|
||||||
stt_enabled: bool = True,
|
|
||||||
intent_enabled: bool = False,
|
|
||||||
keywords: Optional[Sequence[str]] = None,
|
|
||||||
keyword_paths: Optional[Sequence[str]] = None,
|
|
||||||
keyword_model_path: Optional[str] = None,
|
|
||||||
frame_expiration: float = 3.0, # Don't process audio frames older than this
|
|
||||||
speech_model_path: Optional[str] = None,
|
|
||||||
endpoint_duration: Optional[float] = None,
|
|
||||||
enable_automatic_punctuation: bool = False,
|
|
||||||
start_conversation_on_hotword: bool = False,
|
|
||||||
audio_queue_size: int = 100,
|
|
||||||
conversation_timeout: Optional[float] = None,
|
|
||||||
on_conversation_start=_default_callback,
|
|
||||||
on_conversation_end=_default_callback,
|
|
||||||
on_conversation_timeout=_default_callback,
|
|
||||||
on_speech_recognized=_default_callback,
|
|
||||||
on_hotword_detected=_default_callback,
|
|
||||||
):
|
|
||||||
self._access_key = access_key
|
|
||||||
self._stop_event = stop_event
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
self.hotword_enabled = hotword_enabled
|
|
||||||
self.stt_enabled = stt_enabled
|
|
||||||
self.intent_enabled = intent_enabled
|
|
||||||
self.keywords = list(keywords or [])
|
|
||||||
self.keyword_paths = None
|
|
||||||
self.keyword_model_path = None
|
|
||||||
self.frame_expiration = frame_expiration
|
|
||||||
self.speech_model_path = speech_model_path
|
|
||||||
self.endpoint_duration = endpoint_duration
|
|
||||||
self.enable_automatic_punctuation = enable_automatic_punctuation
|
|
||||||
self.start_conversation_on_hotword = start_conversation_on_hotword
|
|
||||||
self.audio_queue_size = audio_queue_size
|
|
||||||
|
|
||||||
self._on_conversation_start = on_conversation_start
|
|
||||||
self._on_conversation_end = on_conversation_end
|
|
||||||
self._on_conversation_timeout = on_conversation_timeout
|
|
||||||
self._on_speech_recognized = on_speech_recognized
|
|
||||||
self._on_hotword_detected = on_hotword_detected
|
|
||||||
|
|
||||||
self._recorder = None
|
|
||||||
self._state = AssistantState.IDLE
|
|
||||||
self._state_lock = RLock()
|
|
||||||
self._ctx = ConversationContext(timeout=conversation_timeout)
|
|
||||||
|
|
||||||
if hotword_enabled:
|
|
||||||
if not keywords:
|
|
||||||
raise ValueError(
|
|
||||||
'You need to provide a list of keywords if the wake-word engine is enabled'
|
|
||||||
)
|
|
||||||
|
|
||||||
if keyword_paths:
|
|
||||||
keyword_paths = [os.path.expanduser(path) for path in keyword_paths]
|
|
||||||
missing_paths = [
|
|
||||||
path for path in keyword_paths if not os.path.isfile(path)
|
|
||||||
]
|
|
||||||
if missing_paths:
|
|
||||||
raise FileNotFoundError(f'Keyword files not found: {missing_paths}')
|
|
||||||
|
|
||||||
self.keyword_paths = keyword_paths
|
|
||||||
|
|
||||||
if keyword_model_path:
|
|
||||||
keyword_model_path = os.path.expanduser(keyword_model_path)
|
|
||||||
if not os.path.isfile(keyword_model_path):
|
|
||||||
raise FileNotFoundError(
|
|
||||||
f'Keyword model file not found: {keyword_model_path}'
|
|
||||||
)
|
|
||||||
|
|
||||||
self.keyword_model_path = keyword_model_path
|
|
||||||
|
|
||||||
self._cheetah: Optional[pvcheetah.Cheetah] = None
|
|
||||||
self._leopard: Optional[pvleopard.Leopard] = None
|
|
||||||
self._porcupine: Optional[pvporcupine.Porcupine] = None
|
|
||||||
self._rhino: Optional[pvrhino.Rhino] = None
|
|
||||||
|
|
||||||
def should_stop(self):
|
|
||||||
return self._stop_event.is_set()
|
|
||||||
|
|
||||||
def wait_stop(self):
|
|
||||||
self._stop_event.wait()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def state(self) -> AssistantState:
|
|
||||||
with self._state_lock:
|
|
||||||
return self._state
|
|
||||||
|
|
||||||
@state.setter
|
|
||||||
def state(self, state: AssistantState):
|
|
||||||
with self._state_lock:
|
|
||||||
prev_state = self._state
|
|
||||||
self._state = state
|
|
||||||
new_state = self.state
|
|
||||||
|
|
||||||
if prev_state == new_state:
|
|
||||||
return
|
|
||||||
|
|
||||||
if prev_state == AssistantState.DETECTING_SPEECH:
|
|
||||||
self._ctx.stop()
|
|
||||||
self._on_conversation_end()
|
|
||||||
elif new_state == AssistantState.DETECTING_SPEECH:
|
|
||||||
self._ctx.start()
|
|
||||||
self._on_conversation_start()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def porcupine(self) -> Optional[pvporcupine.Porcupine]:
|
|
||||||
if not self.hotword_enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not self._porcupine:
|
|
||||||
args: Dict[str, Any] = {'access_key': self._access_key}
|
|
||||||
if self.keywords:
|
|
||||||
args['keywords'] = self.keywords
|
|
||||||
if self.keyword_paths:
|
|
||||||
args['keyword_paths'] = self.keyword_paths
|
|
||||||
if self.keyword_model_path:
|
|
||||||
args['model_path'] = self.keyword_model_path
|
|
||||||
|
|
||||||
self._porcupine = pvporcupine.create(**args)
|
|
||||||
|
|
||||||
return self._porcupine
|
|
||||||
|
|
||||||
@property
|
|
||||||
def cheetah(self) -> Optional[pvcheetah.Cheetah]:
|
|
||||||
if not self.stt_enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not self._cheetah:
|
|
||||||
args: Dict[str, Any] = {'access_key': self._access_key}
|
|
||||||
if self.speech_model_path:
|
|
||||||
args['model_path'] = self.speech_model_path
|
|
||||||
if self.endpoint_duration:
|
|
||||||
args['endpoint_duration_sec'] = self.endpoint_duration
|
|
||||||
if self.enable_automatic_punctuation:
|
|
||||||
args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
|
|
||||||
|
|
||||||
self._cheetah = pvcheetah.create(**args)
|
|
||||||
|
|
||||||
return self._cheetah
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
if self.should_stop():
|
|
||||||
return self
|
|
||||||
|
|
||||||
if self._recorder:
|
|
||||||
self.logger.info('A recording stream already exists')
|
|
||||||
elif self.porcupine or self.cheetah:
|
|
||||||
sample_rate = (self.porcupine or self.cheetah).sample_rate # type: ignore
|
|
||||||
frame_length = (self.porcupine or self.cheetah).frame_length # type: ignore
|
|
||||||
|
|
||||||
self._recorder = AudioRecorder(
|
|
||||||
stop_event=self._stop_event,
|
|
||||||
sample_rate=sample_rate,
|
|
||||||
frame_size=frame_length,
|
|
||||||
queue_size=self.audio_queue_size,
|
|
||||||
channels=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._recorder.__enter__()
|
|
||||||
|
|
||||||
if self.porcupine:
|
|
||||||
self.state = AssistantState.DETECTING_HOTWORD
|
|
||||||
else:
|
|
||||||
self.state = AssistantState.DETECTING_SPEECH
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *_):
|
|
||||||
if self._recorder:
|
|
||||||
self._recorder.__exit__(*_)
|
|
||||||
self._recorder = None
|
|
||||||
|
|
||||||
self.state = AssistantState.IDLE
|
|
||||||
|
|
||||||
if self._cheetah:
|
|
||||||
self._cheetah.delete()
|
|
||||||
self._cheetah = None
|
|
||||||
|
|
||||||
if self._leopard:
|
|
||||||
self._leopard.delete()
|
|
||||||
self._leopard = None
|
|
||||||
|
|
||||||
if self._porcupine:
|
|
||||||
self._porcupine.delete()
|
|
||||||
self._porcupine = None
|
|
||||||
|
|
||||||
if self._rhino:
|
|
||||||
self._rhino.delete()
|
|
||||||
self._rhino = None
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
has_data = False
|
|
||||||
if self.should_stop() or not self._recorder:
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
while not (self.should_stop() or has_data):
|
|
||||||
data = self._recorder.read()
|
|
||||||
if data is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
frame, t = data
|
|
||||||
if time() - t > self.frame_expiration:
|
|
||||||
self.logger.info(
|
|
||||||
'Skipping audio frame older than %ss', self.frame_expiration
|
|
||||||
)
|
|
||||||
continue # The audio frame is too old
|
|
||||||
|
|
||||||
if self.porcupine and self.state == AssistantState.DETECTING_HOTWORD:
|
|
||||||
return self._process_hotword(frame)
|
|
||||||
|
|
||||||
if self.cheetah and self.state == AssistantState.DETECTING_SPEECH:
|
|
||||||
return self._process_speech(frame)
|
|
||||||
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
def _process_hotword(self, frame):
|
|
||||||
if not self.porcupine:
|
|
||||||
return None
|
|
||||||
|
|
||||||
keyword_index = self.porcupine.process(frame)
|
|
||||||
if keyword_index is None:
|
|
||||||
return None # No keyword detected
|
|
||||||
|
|
||||||
if keyword_index >= 0 and self.keywords:
|
|
||||||
if self.start_conversation_on_hotword:
|
|
||||||
self.state = AssistantState.DETECTING_SPEECH
|
|
||||||
|
|
||||||
self._on_hotword_detected(hotword=self.keywords[keyword_index])
|
|
||||||
return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _process_speech(self, frame):
|
|
||||||
if not self.cheetah:
|
|
||||||
return None
|
|
||||||
|
|
||||||
event = None
|
|
||||||
partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
|
|
||||||
|
|
||||||
if partial_transcript:
|
|
||||||
self._ctx.partial_transcript += partial_transcript
|
|
||||||
self.logger.info(
|
|
||||||
'Partial transcript: %s, is_final: %s',
|
|
||||||
self._ctx.partial_transcript,
|
|
||||||
self._ctx.is_final,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self._ctx.is_final or self._ctx.timed_out:
|
|
||||||
phrase = ''
|
|
||||||
if self.cheetah:
|
|
||||||
phrase = self.cheetah.flush()
|
|
||||||
|
|
||||||
self._ctx.partial_transcript += phrase
|
|
||||||
phrase = self._ctx.partial_transcript
|
|
||||||
phrase = phrase[:1].lower() + phrase[1:]
|
|
||||||
|
|
||||||
if self._ctx.is_final or phrase:
|
|
||||||
event = SpeechRecognizedEvent(phrase=phrase)
|
|
||||||
self._on_speech_recognized(phrase=phrase)
|
|
||||||
else:
|
|
||||||
event = ConversationTimeoutEvent()
|
|
||||||
self._on_conversation_timeout()
|
|
||||||
|
|
||||||
self._ctx.reset()
|
|
||||||
if self.hotword_enabled:
|
|
||||||
self.state = AssistantState.DETECTING_HOTWORD
|
|
||||||
|
|
||||||
return event
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,43 +0,0 @@
|
||||||
from dataclasses import dataclass
|
|
||||||
from time import time
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ConversationContext:
|
|
||||||
"""
|
|
||||||
Context of the conversation process.
|
|
||||||
"""
|
|
||||||
|
|
||||||
partial_transcript: str = ''
|
|
||||||
is_final: bool = False
|
|
||||||
timeout: Optional[float] = None
|
|
||||||
t_start: Optional[float] = None
|
|
||||||
t_end: Optional[float] = None
|
|
||||||
|
|
||||||
def start(self):
|
|
||||||
self.reset()
|
|
||||||
self.t_start = time()
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
self.reset()
|
|
||||||
self.t_end = time()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.partial_transcript = ''
|
|
||||||
self.is_final = False
|
|
||||||
self.t_start = None
|
|
||||||
self.t_end = None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def timed_out(self):
|
|
||||||
return (
|
|
||||||
not self.partial_transcript
|
|
||||||
and not self.is_final
|
|
||||||
and self.timeout
|
|
||||||
and self.t_start
|
|
||||||
and time() - self.t_start > self.timeout
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,76 +0,0 @@
|
||||||
from collections import namedtuple
|
|
||||||
from logging import getLogger
|
|
||||||
from queue import Full, Queue
|
|
||||||
from threading import Event
|
|
||||||
from time import time
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import sounddevice as sd
|
|
||||||
|
|
||||||
from platypush.utils import wait_for_either
|
|
||||||
|
|
||||||
|
|
||||||
AudioFrame = namedtuple('AudioFrame', ['data', 'timestamp'])
|
|
||||||
|
|
||||||
|
|
||||||
class AudioRecorder:
|
|
||||||
"""
|
|
||||||
Audio recorder component that uses the sounddevice library to record audio
|
|
||||||
from the microphone.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
stop_event: Event,
|
|
||||||
sample_rate: int,
|
|
||||||
frame_size: int,
|
|
||||||
channels: int,
|
|
||||||
dtype: str = 'int16',
|
|
||||||
queue_size: int = 100,
|
|
||||||
):
|
|
||||||
self.logger = getLogger(__name__)
|
|
||||||
self._audio_queue: Queue[AudioFrame] = Queue(maxsize=queue_size)
|
|
||||||
self.frame_size = frame_size
|
|
||||||
self._stop_event = Event()
|
|
||||||
self._upstream_stop_event = stop_event
|
|
||||||
self.stream = sd.InputStream(
|
|
||||||
samplerate=sample_rate,
|
|
||||||
channels=channels,
|
|
||||||
dtype=dtype,
|
|
||||||
blocksize=frame_size,
|
|
||||||
callback=self._audio_callback,
|
|
||||||
)
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self._stop_event.clear()
|
|
||||||
self.stream.start()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *_):
|
|
||||||
self.stop()
|
|
||||||
|
|
||||||
def _audio_callback(self, indata, *_):
|
|
||||||
if self.should_stop():
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._audio_queue.put_nowait(AudioFrame(indata.reshape(-1), time()))
|
|
||||||
except Full:
|
|
||||||
self.logger.warning('Audio queue is full, dropping audio frame')
|
|
||||||
|
|
||||||
def read(self, timeout: Optional[float] = None):
|
|
||||||
try:
|
|
||||||
return self._audio_queue.get(timeout=timeout)
|
|
||||||
except TimeoutError:
|
|
||||||
self.logger.debug('Audio queue is empty')
|
|
||||||
return None
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
self._stop_event.set()
|
|
||||||
self.stream.stop()
|
|
||||||
|
|
||||||
def should_stop(self):
|
|
||||||
return self._stop_event.is_set() or self._upstream_stop_event.is_set()
|
|
||||||
|
|
||||||
def wait(self, timeout: Optional[float] = None):
|
|
||||||
wait_for_either(self._stop_event, self._upstream_stop_event, timeout=timeout)
|
|
|
@ -1,14 +0,0 @@
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class AssistantState(Enum):
|
|
||||||
"""
|
|
||||||
Possible states of the assistant.
|
|
||||||
"""
|
|
||||||
|
|
||||||
IDLE = 'idle'
|
|
||||||
DETECTING_HOTWORD = 'detecting_hotword'
|
|
||||||
DETECTING_SPEECH = 'detecting_speech'
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
|
@ -1,23 +0,0 @@
|
||||||
manifest:
|
|
||||||
package: platypush.plugins.assistant.picovoice
|
|
||||||
type: plugin
|
|
||||||
events:
|
|
||||||
- platypush.message.event.assistant.ConversationEndEvent
|
|
||||||
- platypush.message.event.assistant.ConversationStartEvent
|
|
||||||
- platypush.message.event.assistant.ConversationTimeoutEvent
|
|
||||||
- platypush.message.event.assistant.HotwordDetectedEvent
|
|
||||||
- platypush.message.event.assistant.MicMutedEvent
|
|
||||||
- platypush.message.event.assistant.MicUnmutedEvent
|
|
||||||
- platypush.message.event.assistant.NoResponseEvent
|
|
||||||
- platypush.message.event.assistant.ResponseEvent
|
|
||||||
- platypush.message.event.assistant.SpeechRecognizedEvent
|
|
||||||
install:
|
|
||||||
pacman:
|
|
||||||
- python-sounddevice
|
|
||||||
pip:
|
|
||||||
- pvcheetah
|
|
||||||
- pvleopard
|
|
||||||
- pvorca
|
|
||||||
- pvporcupine
|
|
||||||
- pvrhino
|
|
||||||
- sounddevice
|
|
|
@ -37,7 +37,17 @@ class MqttClient(mqtt.Client, threading.Thread):
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
self.client_id = client_id or str(Config.get('device_id'))
|
self.client_id = client_id or str(Config.get('device_id'))
|
||||||
mqtt.Client.__init__(self, *args, client_id=self.client_id, **kwargs)
|
kwargs['client_id'] = self.client_id
|
||||||
|
|
||||||
|
# Breaking change in paho.mqtt >= 2.0.0: the callback API version
|
||||||
|
# parameter should be passed, see
|
||||||
|
# https://github.com/eclipse/paho.mqtt.python/blob/28aa2e6b26a86e4b29126323892fb5f43637d6d6/ChangeLog.txt#L6
|
||||||
|
cbApiVersion = getattr(mqtt, 'CallbackAPIVersion', None)
|
||||||
|
if cbApiVersion:
|
||||||
|
kwargs['callback_api_version'] = cbApiVersion.VERSION1
|
||||||
|
|
||||||
|
mqtt.Client.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
threading.Thread.__init__(self, name=f'MQTTClient:{self.client_id}')
|
threading.Thread.__init__(self, name=f'MQTTClient:{self.client_id}')
|
||||||
|
|
||||||
self.logger = logging.getLogger(self.__class__.__name__)
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
|
@ -247,11 +247,9 @@ class AudioManager:
|
||||||
wait_start = time()
|
wait_start = time()
|
||||||
for audio_thread in streams_to_stop:
|
for audio_thread in streams_to_stop:
|
||||||
audio_thread.join(
|
audio_thread.join(
|
||||||
timeout=(
|
timeout=max(0, timeout - (time() - wait_start))
|
||||||
max(0, timeout - (time() - wait_start))
|
if timeout is not None
|
||||||
if timeout is not None
|
else None
|
||||||
else None
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove references
|
# Remove references
|
||||||
|
|
336
platypush/plugins/stt/__init__.py
Normal file
336
platypush/plugins/stt/__init__.py
Normal file
|
@ -0,0 +1,336 @@
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional, Union, List
|
||||||
|
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
from platypush.context import get_bus
|
||||||
|
from platypush.message.event.stt import (
|
||||||
|
SpeechDetectionStartedEvent,
|
||||||
|
SpeechDetectionStoppedEvent,
|
||||||
|
SpeechStartedEvent,
|
||||||
|
SpeechDetectedEvent,
|
||||||
|
HotwordDetectedEvent,
|
||||||
|
ConversationDetectedEvent,
|
||||||
|
)
|
||||||
|
from platypush.message.response.stt import SpeechDetectedResponse
|
||||||
|
from platypush.plugins import Plugin, action
|
||||||
|
|
||||||
|
|
||||||
|
class SttPlugin(ABC, Plugin):
|
||||||
|
"""
|
||||||
|
Abstract class for speech-to-text plugins.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_thread_stop_timeout = 10.0
|
||||||
|
rate = 16000
|
||||||
|
channels = 1
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_device: Optional[Union[int, str]] = None,
|
||||||
|
hotword: Optional[str] = None,
|
||||||
|
hotwords: Optional[List[str]] = None,
|
||||||
|
conversation_timeout: Optional[float] = 10.0,
|
||||||
|
block_duration: float = 1.0,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param input_device: PortAudio device index or name that will be used for recording speech (default: default
|
||||||
|
system audio input device).
|
||||||
|
:param hotword: When this word is detected, the plugin will trigger a
|
||||||
|
:class:`platypush.message.event.stt.HotwordDetectedEvent` instead of a
|
||||||
|
:class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can use these events for hooking other
|
||||||
|
assistants.
|
||||||
|
:param hotwords: Use a list of hotwords instead of a single one.
|
||||||
|
:param conversation_timeout: If ``hotword`` or ``hotwords`` are set and ``conversation_timeout`` is set,
|
||||||
|
the next speech detected event will trigger a :class:`platypush.message.event.stt.ConversationDetectedEvent`
|
||||||
|
instead of a :class:`platypush.message.event.stt.SpeechDetectedEvent` event. You can hook custom hooks
|
||||||
|
here to run any logic depending on the detected speech - it can emulate a kind of
|
||||||
|
"OK, Google. Turn on the lights" interaction without using an external assistant (default: 10 seconds).
|
||||||
|
:param block_duration: Duration of the acquired audio blocks (default: 1 second).
|
||||||
|
"""
|
||||||
|
|
||||||
|
super().__init__()
|
||||||
|
self.input_device = input_device
|
||||||
|
self.conversation_timeout = conversation_timeout
|
||||||
|
self.block_duration = block_duration
|
||||||
|
|
||||||
|
self.hotwords = set(hotwords or [])
|
||||||
|
if hotword:
|
||||||
|
self.hotwords = {hotword}
|
||||||
|
|
||||||
|
self._conversation_event = threading.Event()
|
||||||
|
self._input_stream: Optional[sd.InputStream] = None
|
||||||
|
self._recording_thread: Optional[threading.Thread] = None
|
||||||
|
self._detection_thread: Optional[threading.Thread] = None
|
||||||
|
self._audio_queue: Optional[queue.Queue] = None
|
||||||
|
self._current_text = ''
|
||||||
|
|
||||||
|
def _get_input_device(self, device: Optional[Union[int, str]] = None) -> int:
|
||||||
|
"""
|
||||||
|
Get the index of the input device by index or name.
|
||||||
|
|
||||||
|
:param device: Device index or name. If None is set then the function will return the index of the
|
||||||
|
default audio input device.
|
||||||
|
:return: Index of the audio input device.
|
||||||
|
"""
|
||||||
|
if not device:
|
||||||
|
device = self.input_device
|
||||||
|
if not device:
|
||||||
|
return sd.query_hostapis()[0].get('default_input_device')
|
||||||
|
|
||||||
|
if isinstance(device, int):
|
||||||
|
assert device <= len(sd.query_devices())
|
||||||
|
return device
|
||||||
|
|
||||||
|
for i, dev in enumerate(sd.query_devices()):
|
||||||
|
if dev['name'] == device:
|
||||||
|
return i
|
||||||
|
|
||||||
|
raise AssertionError('Device {} not found'.format(device))
|
||||||
|
|
||||||
|
def on_speech_detected(self, speech: str) -> None:
|
||||||
|
"""
|
||||||
|
Hook called when speech is detected. Triggers the right event depending on the current context.
|
||||||
|
|
||||||
|
:param speech: Detected speech.
|
||||||
|
"""
|
||||||
|
speech = speech.strip()
|
||||||
|
|
||||||
|
if speech in self.hotwords:
|
||||||
|
event = HotwordDetectedEvent(hotword=speech)
|
||||||
|
if self.conversation_timeout:
|
||||||
|
self._conversation_event.set()
|
||||||
|
threading.Timer(
|
||||||
|
self.conversation_timeout, lambda: self._conversation_event.clear()
|
||||||
|
).start()
|
||||||
|
elif self._conversation_event.is_set():
|
||||||
|
event = ConversationDetectedEvent(speech=speech)
|
||||||
|
else:
|
||||||
|
event = SpeechDetectedEvent(speech=speech)
|
||||||
|
|
||||||
|
get_bus().post(event)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_frames(frames: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Conversion method for raw audio frames. It just returns the input frames as bytes. Override it if required
|
||||||
|
by your logic.
|
||||||
|
|
||||||
|
:param frames: Input audio frames, as bytes.
|
||||||
|
:return: The audio frames as passed on the input. Override if required.
|
||||||
|
"""
|
||||||
|
return frames
|
||||||
|
|
||||||
|
def on_detection_started(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``detection_thread`` starts. Initialize your context variables and models here if
|
||||||
|
required.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_detection_ended(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``detection_thread`` stops. Clean up your context variables and models here.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def before_recording(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``recording_thread`` starts. Put here any logic that you may want to run before the
|
||||||
|
recording thread starts.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_recording_started(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called after the ``recording_thread`` opens the audio device. Put here any logic that you may want to
|
||||||
|
run after the recording starts.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_recording_ended(self) -> None:
|
||||||
|
"""
|
||||||
|
Method called when the ``recording_thread`` stops. Put here any logic that you want to run after the audio
|
||||||
|
device is closed.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def detect_speech(self, frames) -> str:
|
||||||
|
"""
|
||||||
|
Method called within the ``detection_thread`` when new audio frames have been captured. Must be implemented
|
||||||
|
by the derived classes.
|
||||||
|
|
||||||
|
:param frames: Audio frames, as returned by ``convert_frames``.
|
||||||
|
:return: Detected text, as a string. Returns an empty string if no text has been detected.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def process_text(self, text: str) -> None:
|
||||||
|
if (not text and self._current_text) or (text and text == self._current_text):
|
||||||
|
self.on_speech_detected(self._current_text)
|
||||||
|
self._current_text = ''
|
||||||
|
else:
|
||||||
|
if text:
|
||||||
|
if not self._current_text:
|
||||||
|
get_bus().post(SpeechStartedEvent())
|
||||||
|
self.logger.info('Intermediate speech results: [{}]'.format(text))
|
||||||
|
|
||||||
|
self._current_text = text
|
||||||
|
|
||||||
|
def detection_thread(self) -> None:
|
||||||
|
"""
|
||||||
|
This thread reads frames from ``_audio_queue``, performs the speech-to-text detection and calls
|
||||||
|
"""
|
||||||
|
self._current_text = ''
|
||||||
|
self.logger.debug('Detection thread started')
|
||||||
|
self.on_detection_started()
|
||||||
|
|
||||||
|
while self._audio_queue:
|
||||||
|
try:
|
||||||
|
frames = self._audio_queue.get()
|
||||||
|
frames = self.convert_frames(frames)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(
|
||||||
|
'Error while feeding audio to the model: {}'.format(str(e))
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
text = self.detect_speech(frames).strip()
|
||||||
|
self.process_text(text)
|
||||||
|
|
||||||
|
self.on_detection_ended()
|
||||||
|
self.logger.debug('Detection thread terminated')
|
||||||
|
|
||||||
|
def recording_thread(
|
||||||
|
self,
|
||||||
|
block_duration: Optional[float] = None,
|
||||||
|
block_size: Optional[int] = None,
|
||||||
|
input_device: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Recording thread. It reads raw frames from the audio device and dispatches them to ``detection_thread``.
|
||||||
|
|
||||||
|
:param block_duration: Audio blocks duration. Specify either ``block_duration`` or ``block_size``.
|
||||||
|
:param block_size: Size of the audio blocks. Specify either ``block_duration`` or ``block_size``.
|
||||||
|
:param input_device: Input device
|
||||||
|
"""
|
||||||
|
assert (block_duration or block_size) and not (
|
||||||
|
block_duration and block_size
|
||||||
|
), 'Please specify either block_duration or block_size'
|
||||||
|
|
||||||
|
if not block_size:
|
||||||
|
block_size = int(self.rate * self.channels * block_duration)
|
||||||
|
|
||||||
|
self.before_recording()
|
||||||
|
self.logger.debug('Recording thread started')
|
||||||
|
device = self._get_input_device(input_device)
|
||||||
|
self._input_stream = sd.InputStream(
|
||||||
|
samplerate=self.rate,
|
||||||
|
device=device,
|
||||||
|
channels=self.channels,
|
||||||
|
dtype='int16',
|
||||||
|
latency=0,
|
||||||
|
blocksize=block_size,
|
||||||
|
)
|
||||||
|
self._input_stream.start()
|
||||||
|
self.on_recording_started()
|
||||||
|
get_bus().post(SpeechDetectionStartedEvent())
|
||||||
|
|
||||||
|
while self._input_stream:
|
||||||
|
try:
|
||||||
|
frames = self._input_stream.read(block_size)[0]
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(
|
||||||
|
'Error while reading from the audio input: {}'.format(str(e))
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._audio_queue.put(frames)
|
||||||
|
|
||||||
|
get_bus().post(SpeechDetectionStoppedEvent())
|
||||||
|
self.on_recording_ended()
|
||||||
|
self.logger.debug('Recording thread terminated')
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
@action
|
||||||
|
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
||||||
|
"""
|
||||||
|
Perform speech-to-text analysis on an audio file. Must be implemented by the derived classes.
|
||||||
|
|
||||||
|
:param audio_file: Path to the audio file.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""
|
||||||
|
Context manager enter. Starts detection and returns self.
|
||||||
|
"""
|
||||||
|
self.start_detection()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""
|
||||||
|
Context manager exit. Stops detection.
|
||||||
|
"""
|
||||||
|
self.stop_detection()
|
||||||
|
|
||||||
|
@action
|
||||||
|
def start_detection(
|
||||||
|
self,
|
||||||
|
input_device: Optional[str] = None,
|
||||||
|
seconds: Optional[float] = None,
|
||||||
|
block_duration: Optional[float] = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Start the speech detection engine.
|
||||||
|
|
||||||
|
:param input_device: Audio input device name/index override
|
||||||
|
:param seconds: If set, then the detection engine will stop after this many seconds, otherwise it'll
|
||||||
|
start running until ``stop_detection`` is called or application stop.
|
||||||
|
:param block_duration: ``block_duration`` override.
|
||||||
|
"""
|
||||||
|
assert (
|
||||||
|
not self._input_stream and not self._recording_thread
|
||||||
|
), 'Speech detection is already running'
|
||||||
|
block_duration = block_duration or self.block_duration
|
||||||
|
input_device = input_device if input_device is not None else self.input_device
|
||||||
|
self._audio_queue = queue.Queue()
|
||||||
|
self._recording_thread = threading.Thread(
|
||||||
|
target=lambda: self.recording_thread(
|
||||||
|
block_duration=block_duration, input_device=input_device
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._recording_thread.start()
|
||||||
|
self._detection_thread = threading.Thread(
|
||||||
|
target=lambda: self.detection_thread()
|
||||||
|
)
|
||||||
|
self._detection_thread.start()
|
||||||
|
|
||||||
|
if seconds:
|
||||||
|
threading.Timer(seconds, lambda: self.stop_detection()).start()
|
||||||
|
|
||||||
|
@action
|
||||||
|
def stop_detection(self) -> None:
|
||||||
|
"""
|
||||||
|
Stop the speech detection engine.
|
||||||
|
"""
|
||||||
|
assert self._input_stream, 'Speech detection is not running'
|
||||||
|
self._input_stream.stop(ignore_errors=True)
|
||||||
|
self._input_stream.close(ignore_errors=True)
|
||||||
|
self._input_stream = None
|
||||||
|
|
||||||
|
if self._recording_thread:
|
||||||
|
self._recording_thread.join(timeout=self._thread_stop_timeout)
|
||||||
|
self._recording_thread = None
|
||||||
|
|
||||||
|
self._audio_queue = None
|
||||||
|
if self._detection_thread:
|
||||||
|
self._detection_thread.join(timeout=self._thread_stop_timeout)
|
||||||
|
self._detection_thread = None
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
0
platypush/plugins/stt/picovoice/__init__.py
Normal file
0
platypush/plugins/stt/picovoice/__init__.py
Normal file
120
platypush/plugins/stt/picovoice/hotword/__init__.py
Normal file
120
platypush/plugins/stt/picovoice/hotword/__init__.py
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from platypush.message.response.stt import SpeechDetectedResponse
|
||||||
|
from platypush.plugins import action
|
||||||
|
from platypush.plugins.stt import SttPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoiceHotwordPlugin(SttPlugin):
|
||||||
|
"""
|
||||||
|
This plugin performs hotword detection using `PicoVoice <https://github.com/Picovoice>`_.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
library_path: Optional[str] = None,
|
||||||
|
model_file_path: Optional[str] = None,
|
||||||
|
keyword_file_paths: Optional[List[str]] = None,
|
||||||
|
sensitivity: float = 0.5,
|
||||||
|
sensitivities: Optional[List[float]] = None,
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
from pvporcupine import Porcupine
|
||||||
|
from pvporcupine.resources.util.python.util import (
|
||||||
|
LIBRARY_PATH,
|
||||||
|
MODEL_FILE_PATH,
|
||||||
|
KEYWORD_FILE_PATHS,
|
||||||
|
)
|
||||||
|
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
self.hotwords = list(self.hotwords)
|
||||||
|
self._hotword_engine: Optional[Porcupine] = None
|
||||||
|
self._library_path = os.path.abspath(
|
||||||
|
os.path.expanduser(library_path or LIBRARY_PATH)
|
||||||
|
)
|
||||||
|
self._model_file_path = os.path.abspath(
|
||||||
|
os.path.expanduser(model_file_path or MODEL_FILE_PATH)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not keyword_file_paths:
|
||||||
|
hotwords = KEYWORD_FILE_PATHS
|
||||||
|
assert all(
|
||||||
|
hotword in hotwords for hotword in self.hotwords
|
||||||
|
), 'Not all the hotwords could be found. Available hotwords: {}'.format(
|
||||||
|
list(hotwords.keys())
|
||||||
|
)
|
||||||
|
|
||||||
|
self._keyword_file_paths = [
|
||||||
|
os.path.abspath(os.path.expanduser(hotwords[hotword]))
|
||||||
|
for hotword in self.hotwords
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
self._keyword_file_paths = [
|
||||||
|
os.path.abspath(os.path.expanduser(p)) for p in keyword_file_paths
|
||||||
|
]
|
||||||
|
|
||||||
|
self._sensitivities = []
|
||||||
|
if sensitivities:
|
||||||
|
assert len(self._keyword_file_paths) == len(
|
||||||
|
sensitivities
|
||||||
|
), 'Please specify as many sensitivities as the number of configured hotwords'
|
||||||
|
|
||||||
|
self._sensitivities = sensitivities
|
||||||
|
else:
|
||||||
|
self._sensitivities = [sensitivity] * len(self._keyword_file_paths)
|
||||||
|
|
||||||
|
def convert_frames(self, frames: bytes) -> tuple:
|
||||||
|
assert self._hotword_engine, 'The hotword engine is not running'
|
||||||
|
return struct.unpack_from("h" * self._hotword_engine.frame_length, frames)
|
||||||
|
|
||||||
|
def on_detection_ended(self) -> None:
|
||||||
|
if self._hotword_engine:
|
||||||
|
self._hotword_engine.delete()
|
||||||
|
self._hotword_engine = None
|
||||||
|
|
||||||
|
def detect_speech(self, frames: tuple) -> str:
|
||||||
|
index = self._hotword_engine.process(frames)
|
||||||
|
if index < 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
if index is True:
|
||||||
|
index = 0
|
||||||
|
return self.hotwords[index]
|
||||||
|
|
||||||
|
@action
|
||||||
|
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
||||||
|
"""
|
||||||
|
Perform speech-to-text analysis on an audio file.
|
||||||
|
|
||||||
|
:param audio_file: Path to the audio file.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def recording_thread(
|
||||||
|
self, input_device: Optional[str] = None, *args, **kwargs
|
||||||
|
) -> None:
|
||||||
|
assert self._hotword_engine, 'The hotword engine has not yet been initialized'
|
||||||
|
super().recording_thread(
|
||||||
|
block_size=self._hotword_engine.frame_length, input_device=input_device
|
||||||
|
)
|
||||||
|
|
||||||
|
@action
|
||||||
|
def start_detection(self, *args, **kwargs) -> None:
|
||||||
|
from pvporcupine import Porcupine
|
||||||
|
|
||||||
|
self._hotword_engine = Porcupine(
|
||||||
|
library_path=self._library_path,
|
||||||
|
model_file_path=self._model_file_path,
|
||||||
|
keyword_file_paths=self._keyword_file_paths,
|
||||||
|
sensitivities=self._sensitivities,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.rate = self._hotword_engine.sample_rate
|
||||||
|
super().start_detection(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
7
platypush/plugins/stt/picovoice/hotword/manifest.yaml
Normal file
7
platypush/plugins/stt/picovoice/hotword/manifest.yaml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
manifest:
|
||||||
|
events: {}
|
||||||
|
install:
|
||||||
|
pip:
|
||||||
|
- pvporcupine
|
||||||
|
package: platypush.plugins.stt.picovoice.hotword
|
||||||
|
type: plugin
|
154
platypush/plugins/stt/picovoice/speech/__init__.py
Normal file
154
platypush/plugins/stt/picovoice/speech/__init__.py
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import struct
|
||||||
|
import threading
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from platypush.message.event.stt import SpeechStartedEvent
|
||||||
|
|
||||||
|
from platypush.context import get_bus
|
||||||
|
from platypush.message.response.stt import SpeechDetectedResponse
|
||||||
|
from platypush.plugins import action
|
||||||
|
from platypush.plugins.stt import SttPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class SttPicovoiceSpeechPlugin(SttPlugin):
|
||||||
|
"""
|
||||||
|
This plugin performs speech detection using `PicoVoice <https://github.com/Picovoice>`_.
|
||||||
|
NOTE: The PicoVoice product used for real-time speech-to-text (Cheetah) can be used freely for
|
||||||
|
personal applications on x86_64 Linux. Other architectures and operating systems require a commercial license.
|
||||||
|
You can ask for a license `here <https://picovoice.ai/contact.html>`_.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
library_path: Optional[str] = None,
|
||||||
|
acoustic_model_path: Optional[str] = None,
|
||||||
|
language_model_path: Optional[str] = None,
|
||||||
|
license_path: Optional[str] = None,
|
||||||
|
end_of_speech_timeout: int = 1,
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param library_path: Path to the Cheetah binary library for your OS
|
||||||
|
(default: ``CHEETAH_INSTALL_DIR/lib/OS/ARCH/libpv_cheetah.EXT``).
|
||||||
|
:param acoustic_model_path: Path to the acoustic speech model
|
||||||
|
(default: ``CHEETAH_INSTALL_DIR/lib/common/acoustic_model.pv``).
|
||||||
|
:param language_model_path: Path to the language model
|
||||||
|
(default: ``CHEETAH_INSTALL_DIR/lib/common/language_model.pv``).
|
||||||
|
:param license_path: Path to your PicoVoice license
|
||||||
|
(default: ``CHEETAH_INSTALL_DIR/resources/license/cheetah_eval_linux_public.lic``).
|
||||||
|
:param end_of_speech_timeout: Number of seconds of silence during speech recognition before considering
|
||||||
|
a phrase over (default: 1).
|
||||||
|
"""
|
||||||
|
from pvcheetah import Cheetah
|
||||||
|
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
self._basedir = os.path.abspath(
|
||||||
|
os.path.join(inspect.getfile(Cheetah), '..', '..', '..')
|
||||||
|
)
|
||||||
|
if not library_path:
|
||||||
|
library_path = self._get_library_path()
|
||||||
|
if not language_model_path:
|
||||||
|
language_model_path = os.path.join(
|
||||||
|
self._basedir, 'lib', 'common', 'language_model.pv'
|
||||||
|
)
|
||||||
|
if not acoustic_model_path:
|
||||||
|
acoustic_model_path = os.path.join(
|
||||||
|
self._basedir, 'lib', 'common', 'acoustic_model.pv'
|
||||||
|
)
|
||||||
|
if not license_path:
|
||||||
|
license_path = os.path.join(
|
||||||
|
self._basedir, 'resources', 'license', 'cheetah_eval_linux_public.lic'
|
||||||
|
)
|
||||||
|
|
||||||
|
self._library_path = library_path
|
||||||
|
self._language_model_path = language_model_path
|
||||||
|
self._acoustic_model_path = acoustic_model_path
|
||||||
|
self._license_path = license_path
|
||||||
|
self._end_of_speech_timeout = end_of_speech_timeout
|
||||||
|
self._stt_engine: Optional[Cheetah] = None
|
||||||
|
self._speech_in_progress = threading.Event()
|
||||||
|
|
||||||
|
def _get_library_path(self) -> str:
|
||||||
|
path = os.path.join(
|
||||||
|
self._basedir, 'lib', platform.system().lower(), platform.machine()
|
||||||
|
)
|
||||||
|
return os.path.join(
|
||||||
|
path, [f for f in os.listdir(path) if f.startswith('libpv_cheetah.')][0]
|
||||||
|
)
|
||||||
|
|
||||||
|
def convert_frames(self, frames: bytes) -> tuple:
|
||||||
|
assert self._stt_engine, 'The speech engine is not running'
|
||||||
|
return struct.unpack_from("h" * self._stt_engine.frame_length, frames)
|
||||||
|
|
||||||
|
def on_detection_ended(self) -> None:
|
||||||
|
if self._stt_engine:
|
||||||
|
self._stt_engine.delete()
|
||||||
|
self._stt_engine = None
|
||||||
|
|
||||||
|
def detect_speech(self, frames: tuple) -> str:
|
||||||
|
text, is_endpoint = self._stt_engine.process(frames)
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
if text:
|
||||||
|
if not self._speech_in_progress.is_set():
|
||||||
|
self._speech_in_progress.set()
|
||||||
|
get_bus().post(SpeechStartedEvent())
|
||||||
|
|
||||||
|
self._current_text += ' ' + text.strip()
|
||||||
|
|
||||||
|
if is_endpoint:
|
||||||
|
text = self._stt_engine.flush().strip().strip()
|
||||||
|
if text:
|
||||||
|
self._current_text += ' ' + text
|
||||||
|
|
||||||
|
self._speech_in_progress.clear()
|
||||||
|
if self._current_text:
|
||||||
|
self.on_speech_detected(self._current_text)
|
||||||
|
|
||||||
|
self._current_text = ''
|
||||||
|
|
||||||
|
return self._current_text
|
||||||
|
|
||||||
|
def process_text(self, text: str) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@action
|
||||||
|
def detect(self, audio_file: str) -> SpeechDetectedResponse:
|
||||||
|
"""
|
||||||
|
Perform speech-to-text analysis on an audio file.
|
||||||
|
|
||||||
|
:param audio_file: Path to the audio file.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def recording_thread(
|
||||||
|
self, input_device: Optional[str] = None, *args, **kwargs
|
||||||
|
) -> None:
|
||||||
|
assert self._stt_engine, 'The hotword engine has not yet been initialized'
|
||||||
|
super().recording_thread(
|
||||||
|
block_size=self._stt_engine.frame_length, input_device=input_device
|
||||||
|
)
|
||||||
|
|
||||||
|
@action
|
||||||
|
def start_detection(self, *args, **kwargs) -> None:
|
||||||
|
from pvcheetah import Cheetah
|
||||||
|
|
||||||
|
self._stt_engine = Cheetah(
|
||||||
|
library_path=self._library_path,
|
||||||
|
acoustic_model_path=self._acoustic_model_path,
|
||||||
|
language_model_path=self._language_model_path,
|
||||||
|
license_path=self._license_path,
|
||||||
|
endpoint_duration_sec=self._end_of_speech_timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.rate = self._stt_engine.sample_rate
|
||||||
|
self._speech_in_progress.clear()
|
||||||
|
super().start_detection(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
7
platypush/plugins/stt/picovoice/speech/manifest.yaml
Normal file
7
platypush/plugins/stt/picovoice/speech/manifest.yaml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
manifest:
|
||||||
|
events: {}
|
||||||
|
install:
|
||||||
|
pip:
|
||||||
|
- cheetah
|
||||||
|
package: platypush.plugins.stt.picovoice.speech
|
||||||
|
type: plugin
|
|
@ -83,10 +83,7 @@ mock_imports = [
|
||||||
"pmw3901",
|
"pmw3901",
|
||||||
"psutil",
|
"psutil",
|
||||||
"pvcheetah",
|
"pvcheetah",
|
||||||
"pvleopard",
|
"pvporcupine ",
|
||||||
"pvorca",
|
|
||||||
"pvporcupine",
|
|
||||||
"pvrhino",
|
|
||||||
"pyHS100",
|
"pyHS100",
|
||||||
"pyaudio",
|
"pyaudio",
|
||||||
"pychromecast",
|
"pychromecast",
|
||||||
|
|
Loading…
Reference in a new issue