Compare commits

...

7 Commits

Author SHA1 Message Date
Fabio Manganiello 2b287b569f
[assistant.picovoice] Conversation flow improvements.
continuous-integration/drone/push Build is passing Details
- The `Responding` state should be modelled as an extra event/binary
  flag, not as an assistant state. The assistant may be listening for
  hotwords even while the `tts` plugin is responding, and we don't want
  the two states to interfere with each either - neither to build a more
  complex state machine that also needs to take concurrent states into
  account.

- Stop any responses being rendered upon the `tts` plugin when a new
  hotword audio is detected. If e.g. I say "Ok Google", I should always
  be able to trigger the assistant and stop any concurrent audio
  process.

- `SpeechRecognizedEvent` should be emitted even if `cheetah`'s latest
  audio frame results weren't marked as final, and the speech detection
  window timed out. Cheetah's `is_final` detection seems to be quite
  buggy sometimes, and it may not properly detect the end of utterances,
  especially with non-native accents. The workaround is to flush out
  whatever text is available (if at least some speech was detected) into
  a `SpeechRecognizedEvent` upon timeout.
2024-04-13 20:01:21 +02:00
Fabio Manganiello 24e93ad160
Added more default imports under the `platypush` module root.
These objects can now also be imported in scripts through
`from platypush import <name>`:

- `Variable`
- `cron`
- `hook`
- `procedure`
2024-04-10 23:33:48 +02:00
Fabio Manganiello 3b73b22db9
[assistant.picovoice] More features.
- Added wiring between `assistant.picovoice` and `tts.picovoice`.

- Added `RESPONDING` status to the assistant.

- Added ability to override the default speech model upon
  `start_conversation`.

- Better handling of conversation timeouts.

- Cache Cheetah objects in a `model -> object` map - at least the
  default model should be pre-loaded, since model loading at runtime
  seems to take a while, and that could impact the ability to detect the
  speech in the first seconds after a hotword is detected.
2024-04-10 22:26:45 +02:00
Fabio Manganiello 9761cc2eef
Added `tts.picovoice` plugin. 2024-04-10 20:32:32 +02:00
Fabio Manganiello 6bd20bfcf6
Added ffmpeg requirement for `assistant.picovoice`. 2024-04-10 20:31:38 +02:00
Fabio Manganiello 8702eaa25b
s/partial_transcript/transcript/g 2024-04-09 00:19:51 +02:00
Fabio Manganiello 6feb824c04
Refactored `AssistantEvent`.
`AssistantEvent.assistant` is now modelled as an opaque object that
behaves the following way:

- The underlying plugin name is saved under `event.args['_assistant']`.

- `event.assistant` is a property that returns the assistant instance
  via `get_plugin`.

- `event.assistant` is reported as a string (plugin qualified name) upon
  event dump.

This allows event hooks to easily use `event.assistant` to interact with
the underlying assistant and easily modify the conversation flow, while
event hook conditions can still be easily modelled as equality
operations between strings.
2024-04-09 00:15:51 +02:00
11 changed files with 404 additions and 75 deletions

View File

@ -7,10 +7,13 @@ Platypush
from .app import Application
from .config import Config
from .context import get_backend, get_bus, get_plugin
from .context import Variable, get_backend, get_bus, get_plugin
from .cron import cron
from .event.hook import hook
from .message.event import Event
from .message.request import Request
from .message.response import Response
from .procedure import procedure
from .runner import main
from .utils import run
@ -19,14 +22,18 @@ __author__ = 'Fabio Manganiello <fabio@manganiello.tech>'
__version__ = '0.50.3'
__all__ = [
'Application',
'Variable',
'Config',
'Event',
'Request',
'Response',
'cron',
'get_backend',
'get_bus',
'get_plugin',
'hook',
'main',
'procedure',
'run',
]

View File

@ -257,26 +257,29 @@ class Event(Message):
return result
def as_dict(self):
"""
Converts the event into a dictionary
"""
args = copy.deepcopy(self.args)
flatten(args)
return {
'type': 'event',
'target': self.target,
'origin': self.origin if hasattr(self, 'origin') else None,
'id': self.id if hasattr(self, 'id') else None,
'_timestamp': self.timestamp,
'args': {'type': self.type, **args},
}
def __str__(self):
"""
Overrides the str() operator and converts
the message into a UTF-8 JSON string
"""
args = copy.deepcopy(self.args)
flatten(args)
return json.dumps(
{
'type': 'event',
'target': self.target,
'origin': self.origin if hasattr(self, 'origin') else None,
'id': self.id if hasattr(self, 'id') else None,
'_timestamp': self.timestamp,
'args': {'type': self.type, **args},
},
cls=self.Encoder,
)
return json.dumps(self.as_dict(), cls=self.Encoder)
@dataclass

View File

@ -1,27 +1,53 @@
import re
import sys
from typing import Optional
from typing import Optional, Union
from platypush.context import get_plugin
from platypush.message.event import Event
from platypush.plugins.assistant import AssistantPlugin
from platypush.utils import get_plugin_name_by_class
class AssistantEvent(Event):
"""Base class for assistant events"""
def __init__(self, *args, assistant: Optional[str] = None, **kwargs):
def __init__(
self, *args, assistant: Optional[Union[str, AssistantPlugin]] = None, **kwargs
):
"""
:param assistant: Name of the assistant plugin that triggered the event.
"""
super().__init__(*args, assistant=assistant, **kwargs)
assistant = assistant or kwargs.get('assistant')
if assistant:
assistant = (
assistant
if isinstance(assistant, str)
else get_plugin_name_by_class(assistant.__class__)
)
kwargs['_assistant'] = assistant
super().__init__(*args, **kwargs)
@property
def _assistant(self):
return (
get_plugin(self.args.get('assistant'))
if self.args.get('assistant')
else None
)
def assistant(self) -> Optional[AssistantPlugin]:
assistant = self.args.get('_assistant')
if not assistant:
return None
return get_plugin(assistant)
def as_dict(self):
evt_dict = super().as_dict()
evt_args = {**evt_dict['args']}
assistant = evt_args.pop('_assistant', None)
if assistant:
evt_args['assistant'] = assistant
return {
**evt_dict,
'args': evt_args,
}
class ConversationStartEvent(AssistantEvent):
@ -95,8 +121,8 @@ class SpeechRecognizedEvent(AssistantEvent):
"""
result = super().matches_condition(condition)
if result.is_match and self._assistant and 'phrase' in condition.args:
self._assistant.stop_conversation()
if result.is_match and self.assistant and 'phrase' in condition.args:
self.assistant.stop_conversation()
return result

View File

@ -122,6 +122,12 @@ class Plugin(EventGenerator, ExtensionWithManifest): # lgtm [py/missing-call-to
assert entities, 'entities plugin not initialized'
return entities
def __str__(self):
"""
:return: The qualified name of the plugin.
"""
return get_plugin_name_by_class(self.__class__)
def run(self, method, *args, **kwargs):
assert (
method in self.registered_actions

View File

@ -8,24 +8,7 @@ from typing import Any, Collection, Dict, Optional, Type
from platypush.context import get_bus, get_plugin
from platypush.entities.assistants import Assistant
from platypush.entities.managers.assistants import AssistantEntityManager
from platypush.message.event.assistant import (
AlarmEndEvent,
AlarmStartedEvent,
AlertEndEvent,
AlertStartedEvent,
AssistantEvent,
ConversationEndEvent,
ConversationStartEvent,
ConversationTimeoutEvent,
HotwordDetectedEvent,
MicMutedEvent,
MicUnmutedEvent,
NoResponseEvent,
ResponseEvent,
SpeechRecognizedEvent,
TimerEndEvent,
TimerStartedEvent,
)
from platypush.message.event import Event as AppEvent
from platypush.plugins import Plugin, action
from platypush.utils import get_plugin_name_by_class
@ -182,6 +165,17 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self.publish_entities([self])
return asdict(self._state)
@action
def render_response(self, text: str, *_, **__):
"""
Render a response text as audio over the configured TTS plugin.
:param text: Text to render.
"""
self._on_response_render_start(text)
self._render_response(text)
self._on_response_render_end()
def _get_tts_plugin(self):
if not self.tts_plugin:
return None
@ -201,11 +195,13 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
audio.play(self._conversation_start_sound)
def _send_event(self, event_type: Type[AssistantEvent], **kwargs):
def _send_event(self, event_type: Type[AppEvent], **kwargs):
self.publish_entities([self])
get_bus().post(event_type(assistant=self._plugin_name, **kwargs))
def _on_conversation_start(self):
from platypush.message.event.assistant import ConversationStartEvent
self._last_response = None
self._last_query = None
self._conversation_running.set()
@ -213,66 +209,98 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self._play_conversation_start_sound()
def _on_conversation_end(self):
from platypush.message.event.assistant import ConversationEndEvent
self._conversation_running.clear()
self._send_event(ConversationEndEvent)
def _on_conversation_timeout(self):
from platypush.message.event.assistant import ConversationTimeoutEvent
self._last_response = None
self._last_query = None
self._conversation_running.clear()
self._send_event(ConversationTimeoutEvent)
def _on_no_response(self):
from platypush.message.event.assistant import NoResponseEvent
self._last_response = None
self._conversation_running.clear()
self._send_event(NoResponseEvent)
def _on_reponse_rendered(self, text: Optional[str]):
def _on_response_render_start(self, text: Optional[str]):
from platypush.message.event.assistant import ResponseEvent
self._last_response = text
self._send_event(ResponseEvent, response_text=text)
tts = self._get_tts_plugin()
def _render_response(self, text: Optional[str]):
tts = self._get_tts_plugin()
if tts and text:
self.stop_conversation()
tts.say(text=text, **self.tts_plugin_args)
def _on_response_render_end(self):
pass
def _on_hotword_detected(self, hotword: Optional[str]):
from platypush.message.event.assistant import HotwordDetectedEvent
self._send_event(HotwordDetectedEvent, hotword=hotword)
def _on_speech_recognized(self, phrase: Optional[str]):
from platypush.message.event.assistant import SpeechRecognizedEvent
phrase = (phrase or '').lower().strip()
self._last_query = phrase
self._send_event(SpeechRecognizedEvent, phrase=phrase)
def _on_alarm_start(self):
from platypush.message.event.assistant import AlarmStartedEvent
self._cur_alert_type = AlertType.ALARM
self._send_event(AlarmStartedEvent)
def _on_alarm_end(self):
from platypush.message.event.assistant import AlarmEndEvent
self._cur_alert_type = None
self._send_event(AlarmEndEvent)
def _on_timer_start(self):
from platypush.message.event.assistant import TimerStartedEvent
self._cur_alert_type = AlertType.TIMER
self._send_event(TimerStartedEvent)
def _on_timer_end(self):
from platypush.message.event.assistant import TimerEndEvent
self._cur_alert_type = None
self._send_event(TimerEndEvent)
def _on_alert_start(self):
from platypush.message.event.assistant import AlertStartedEvent
self._cur_alert_type = AlertType.ALERT
self._send_event(AlertStartedEvent)
def _on_alert_end(self):
from platypush.message.event.assistant import AlertEndEvent
self._cur_alert_type = None
self._send_event(AlertEndEvent)
def _on_mute(self):
from platypush.message.event.assistant import MicMutedEvent
self._is_muted = True
self._send_event(MicMutedEvent)
def _on_unmute(self):
from platypush.message.event.assistant import MicUnmutedEvent
self._is_muted = False
self._send_event(MicUnmutedEvent)

View File

@ -1,7 +1,10 @@
import os
from typing import Optional, Sequence
from platypush.context import get_plugin
from platypush.plugins import RunnablePlugin, action
from platypush.plugins.assistant import AssistantPlugin
from platypush.plugins.tts.picovoice import TtsPicovoicePlugin
from ._assistant import Assistant
from ._state import AssistantState
@ -96,7 +99,12 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
using a language other than English, you can provide the path to the
model file for that language. Model files are available for all the
supported languages through the `Picovoice repository
<https://github.com/Picovoice/porcupine/tree/master/lib/common>`_.
<https://github.com/Picovoice/cheetah/tree/master/lib/common>`_.
You can also use the `Picovoice console
<https://console.picovoice.ai/cat>`_
to train your custom models. You can use a base model and fine-tune
it by boosting the detection of your own words and phrases and edit
the phonetic representation of the words you want to detect.
:param endpoint_duration: If set, the assistant will stop listening when
no speech is detected for the specified duration (in seconds) after
the end of an utterance.
@ -146,15 +154,43 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
'on_hotword_detected': self._on_hotword_detected,
}
@property
def tts(self) -> TtsPicovoicePlugin:
p = get_plugin('tts.picovoice')
assert p, 'Picovoice TTS plugin not configured/found'
return p
def _get_tts_plugin(self) -> TtsPicovoicePlugin:
return self.tts
def _on_response_render_start(self, text: Optional[str]):
if self._assistant:
self._assistant.set_responding(True)
return super()._on_response_render_start(text)
def _on_response_render_end(self):
if self._assistant:
self._assistant.set_responding(False)
return super()._on_response_render_end()
@action
def start_conversation(self, *_, **__):
def start_conversation(self, *_, model_file: Optional[str] = None, **__):
"""
Programmatically start a conversation with the assistant
Programmatically start a conversation with the assistant.
:param model_file: Override the model file to be used to detect speech
in this conversation. If not set, the configured
``speech_model_path`` will be used.
"""
if not self._assistant:
self.logger.warning('Assistant not initialized')
return
if model_file:
model_file = os.path.expanduser(model_file)
self._assistant.override_speech_model(model_file)
self._assistant.state = AssistantState.DETECTING_SPEECH
@action
@ -166,6 +202,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
self.logger.warning('Assistant not initialized')
return
self._assistant.override_speech_model(None)
if self._assistant.hotword_enabled:
self._assistant.state = AssistantState.DETECTING_HOTWORD
else:
@ -215,7 +253,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
with Assistant(**self._assistant_args) as self._assistant:
try:
for event in self._assistant:
self.logger.debug('Picovoice assistant event: %s', event)
if event is not None:
self.logger.debug('Picovoice assistant event: %s', event)
except KeyboardInterrupt:
break
except Exception as e:

View File

@ -9,11 +9,13 @@ import pvleopard
import pvporcupine
import pvrhino
from platypush.context import get_plugin
from platypush.message.event.assistant import (
ConversationTimeoutEvent,
HotwordDetectedEvent,
SpeechRecognizedEvent,
)
from platypush.plugins.tts.picovoice import TtsPicovoicePlugin
from ._context import ConversationContext
from ._recorder import AudioRecorder
@ -25,6 +27,7 @@ class Assistant:
A facade class that wraps the Picovoice engines under an assistant API.
"""
@staticmethod
def _default_callback(*_, **__):
pass
@ -60,12 +63,14 @@ class Assistant:
self.keywords = list(keywords or [])
self.keyword_paths = None
self.keyword_model_path = None
self._responding = Event()
self.frame_expiration = frame_expiration
self.speech_model_path = speech_model_path
self.endpoint_duration = endpoint_duration
self.enable_automatic_punctuation = enable_automatic_punctuation
self.start_conversation_on_hotword = start_conversation_on_hotword
self.audio_queue_size = audio_queue_size
self._speech_model_path = speech_model_path
self._speech_model_path_override = None
self._on_conversation_start = on_conversation_start
self._on_conversation_end = on_conversation_end
@ -103,11 +108,32 @@ class Assistant:
self.keyword_model_path = keyword_model_path
self._cheetah: Optional[pvcheetah.Cheetah] = None
# Model path -> model instance cache
self._cheetah = {}
self._leopard: Optional[pvleopard.Leopard] = None
self._porcupine: Optional[pvporcupine.Porcupine] = None
self._rhino: Optional[pvrhino.Rhino] = None
@property
def is_responding(self):
return self._responding.is_set()
@property
def speech_model_path(self):
return self._speech_model_path_override or self._speech_model_path
@property
def tts(self) -> TtsPicovoicePlugin:
p = get_plugin('tts.picovoice')
assert p, 'Picovoice TTS plugin not configured/found'
return p
def set_responding(self, responding: bool):
if responding:
self._responding.set()
else:
self._responding.clear()
def should_stop(self):
return self._stop_event.is_set()
@ -130,12 +156,18 @@ class Assistant:
return
if prev_state == AssistantState.DETECTING_SPEECH:
self.tts.stop()
self._ctx.stop()
self._speech_model_path_override = None
self._on_conversation_end()
elif new_state == AssistantState.DETECTING_SPEECH:
self._ctx.start()
self._on_conversation_start()
if new_state == AssistantState.DETECTING_HOTWORD:
self.tts.stop()
self._ctx.reset()
@property
def porcupine(self) -> Optional[pvporcupine.Porcupine]:
if not self.hotword_enabled:
@ -159,7 +191,7 @@ class Assistant:
if not self.stt_enabled:
return None
if not self._cheetah:
if not self._cheetah.get(self.speech_model_path):
args: Dict[str, Any] = {'access_key': self._access_key}
if self.speech_model_path:
args['model_path'] = self.speech_model_path
@ -168,20 +200,22 @@ class Assistant:
if self.enable_automatic_punctuation:
args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
self._cheetah = pvcheetah.create(**args)
self._cheetah[self.speech_model_path] = pvcheetah.create(**args)
return self._cheetah
return self._cheetah[self.speech_model_path]
def __enter__(self):
"""
Get the assistant ready to start processing audio frames.
"""
if self.should_stop():
return self
if self._recorder:
self.logger.info('A recording stream already exists')
elif self.porcupine or self.cheetah:
elif self.hotword_enabled or self.stt_enabled:
sample_rate = (self.porcupine or self.cheetah).sample_rate # type: ignore
frame_length = (self.porcupine or self.cheetah).frame_length # type: ignore
self._recorder = AudioRecorder(
stop_event=self._stop_event,
sample_rate=sample_rate,
@ -190,6 +224,9 @@ class Assistant:
channels=1,
)
if self.stt_enabled:
self._cheetah[self.speech_model_path] = self.cheetah
self._recorder.__enter__()
if self.porcupine:
@ -200,15 +237,18 @@ class Assistant:
return self
def __exit__(self, *_):
"""
Stop the assistant and release all resources.
"""
if self._recorder:
self._recorder.__exit__(*_)
self._recorder = None
self.state = AssistantState.IDLE
if self._cheetah:
self._cheetah.delete()
self._cheetah = None
for model in [*self._cheetah.keys()]:
cheetah = self._cheetah.pop(model, None)
if cheetah:
cheetah.delete()
if self._leopard:
self._leopard.delete()
@ -223,9 +263,15 @@ class Assistant:
self._rhino = None
def __iter__(self):
"""
Iterate over processed assistant events.
"""
return self
def __next__(self):
"""
Process the next audio frame and return the corresponding event.
"""
has_data = False
if self.should_stop() or not self._recorder:
raise StopIteration
@ -242,10 +288,10 @@ class Assistant:
)
continue # The audio frame is too old
if self.porcupine and self.state == AssistantState.DETECTING_HOTWORD:
if self.hotword_enabled and self.state == AssistantState.DETECTING_HOTWORD:
return self._process_hotword(frame)
if self.cheetah and self.state == AssistantState.DETECTING_SPEECH:
if self.stt_enabled and self.state == AssistantState.DETECTING_SPEECH:
return self._process_speech(frame)
raise StopIteration
@ -262,6 +308,7 @@ class Assistant:
if self.start_conversation_on_hotword:
self.state = AssistantState.DETECTING_SPEECH
self.tts.stop()
self._on_hotword_detected(hotword=self.keywords[keyword_index])
return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
@ -275,23 +322,20 @@ class Assistant:
partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
if partial_transcript:
self._ctx.partial_transcript += partial_transcript
self._ctx.transcript += partial_transcript
self.logger.info(
'Partial transcript: %s, is_final: %s',
self._ctx.partial_transcript,
self._ctx.transcript,
self._ctx.is_final,
)
if self._ctx.is_final or self._ctx.timed_out:
phrase = ''
if self.cheetah:
phrase = self.cheetah.flush()
self._ctx.partial_transcript += phrase
phrase = self._ctx.partial_transcript
phrase = self.cheetah.flush() or ''
self._ctx.transcript += phrase
phrase = self._ctx.transcript
phrase = phrase[:1].lower() + phrase[1:]
if self._ctx.is_final or phrase:
if phrase:
event = SpeechRecognizedEvent(phrase=phrase)
self._on_speech_recognized(phrase=phrase)
else:
@ -304,5 +348,8 @@ class Assistant:
return event
def override_speech_model(self, model_path: Optional[str]):
self._speech_model_path_override = model_path
# vim:sw=4:ts=4:et:

View File

@ -9,7 +9,7 @@ class ConversationContext:
Context of the conversation process.
"""
partial_transcript: str = ''
transcript: str = ''
is_final: bool = False
timeout: Optional[float] = None
t_start: Optional[float] = None
@ -24,7 +24,7 @@ class ConversationContext:
self.t_end = time()
def reset(self):
self.partial_transcript = ''
self.transcript = ''
self.is_final = False
self.t_start = None
self.t_end = None
@ -32,11 +32,17 @@ class ConversationContext:
@property
def timed_out(self):
return (
not self.partial_transcript
not self.transcript
and not self.is_final
and self.timeout
and self.t_start
and time() - self.t_start > self.timeout
) or (
self.transcript
and not self.is_final
and self.timeout
and self.t_start
and time() - self.t_start > self.timeout * 2
)

View File

@ -12,7 +12,14 @@ manifest:
- platypush.message.event.assistant.ResponseEvent
- platypush.message.event.assistant.SpeechRecognizedEvent
install:
apk:
- ffmpeg
apt:
- ffmpeg
dnf:
- ffmpeg
pacman:
- ffmpeg
- python-sounddevice
pip:
- pvcheetah

View File

@ -0,0 +1,138 @@
import os
from threading import RLock
from typing import Optional
import numpy as np
import pvorca
import sounddevice as sd
from platypush.config import Config
from platypush.plugins import action
from platypush.plugins.tts import TtsPlugin
class TtsPicovoicePlugin(TtsPlugin):
"""
This TTS plugin enables you to render text as audio using `Picovoice
<https://picovoice.ai>`_'s (still experimental) `Orca TTS engine
<https://github.com/Picovoice/orca>`_.
Take a look at
:class:`platypush.plugins.assistant.picovoice.AssistantPicovoicePlugin`
for details on how to sign up for a Picovoice account and get the API key.
Also note that using the TTS features requires you to select Orca from the
list of products available for your account on the `Picovoice console
<https://console.picovoice.ai>`_.
"""
def __init__(
self,
access_key: Optional[str] = None,
model_path: Optional[str] = None,
**kwargs,
):
"""
:param access_key: Picovoice access key. If it's not specified here,
then it must be specified on the configuration of
:class:`platypush.plugins.assistant.picovoice.AssistantPicovoicePlugin`.
:param model_path: Path of the TTS model file (default: use the default
English model).
"""
super().__init__(**kwargs)
if not access_key:
access_key = Config.get('assistant.picovoice', {}).get('access_key')
assert (
access_key
), 'No access key specified and no assistant.picovoice plugin found'
self.model_path = model_path
self.access_key = access_key
if model_path:
model_path = os.path.expanduser(model_path)
self._stream: Optional[sd.OutputStream] = None
self._stream_lock = RLock()
def _play_audio(self, orca: pvorca.Orca, pcm: np.ndarray):
with self._stream_lock:
self.stop()
self._stream = sd.OutputStream(
samplerate=orca.sample_rate,
channels=1,
dtype='int16',
)
try:
self._stream.start()
self._stream.write(pcm)
except Exception as e:
self.logger.warning('Error playing audio: %s: %s', type(e), str(e))
finally:
try:
self.stop()
self._stream.close()
except Exception as e:
self.logger.warning(
'Error stopping audio stream: %s: %s', type(e), str(e)
)
finally:
if self._stream:
self._stream = None
def get_orca(self, model_path: Optional[str] = None):
if not model_path:
model_path = self.model_path
if model_path:
model_path = os.path.expanduser(model_path)
return pvorca.create(access_key=self.access_key, model_path=model_path)
@action
def say(
self,
text: str,
*_,
output_file: Optional[str] = None,
speech_rate: Optional[float] = None,
model_path: Optional[str] = None,
**__,
):
"""
Say some text.
:param text: Text to say.
:param output_file: If set, save the audio to the specified file.
Otherwise play it.
:param speech_rate: Speech rate (default: None).
:param model_path: Path of the TTS model file (default: use the default
configured model).
"""
orca = self.get_orca(model_path=model_path)
if output_file:
orca.synthesize_to_file(
text, os.path.expanduser(output_file), speech_rate=speech_rate
)
return
self._play_audio(
orca=orca,
pcm=np.array(
orca.synthesize(text, speech_rate=speech_rate),
dtype='int16',
),
)
@action
def stop(self):
"""
Stop the currently playing audio.
"""
with self._stream_lock:
if not self._stream:
return
self._stream.stop()
# vim:sw=4:ts=4:et:

View File

@ -0,0 +1,22 @@
manifest:
events: {}
install:
apk:
- ffmpeg
- py3-numpy
apt:
- ffmpeg
- python3-numpy
dnf:
- ffmpeg
- python-numpy
pacman:
- ffmpeg
- python-numpy
- python-sounddevice
pip:
- numpy
- pvorca
- sounddevice
package: platypush.plugins.tts.picovoice
type: plugin