Compare commits

...

3 Commits

Author SHA1 Message Date
Fabio Manganiello 7c504685e2
Prevent a potential recursion error in `wait_for_either`.
continuous-integration/drone/push Build is passing Details
We shouldn't overwrite `event._set` and `event._clear` if those values
have already been set.

Those attributes hold the original references to `Event.set` and
`Event.clear` respectively, and the `OrEvent` logic overwrites them with
a callback-based logic.

This shouldn't happen if those attributes are already present.
2024-04-14 23:27:13 +02:00
Fabio Manganiello 699f92e42b
[assistant] Added `ResponseEndEvent` and `IntentMatchedEvent` 2024-04-14 23:26:59 +02:00
Fabio Manganiello 317241eb36
[assistant.picovoice] Various improvements.
- Added `intent_model_path` parameter.

- Always apply `expanduser` to configuration paths.

- Better logic to infer the fallback model path.

- The Picovoice Leonardo object should always be removed after
  `assistant.picovoice.transcribe` is called.
2024-04-14 21:24:06 +02:00
4 changed files with 150 additions and 20 deletions

View File

@ -1,9 +1,9 @@
import re
import sys
from typing import Optional, Union
from typing import Any, Mapping, Optional, Union
from platypush.context import get_plugin
from platypush.message.event import Event
from platypush.message.event import Event, EventMatchResult
from platypush.plugins.assistant import AssistantPlugin
from platypush.utils import get_plugin_name_by_class
@ -102,6 +102,23 @@ class ResponseEvent(AssistantEvent):
)
class ResponseEndEvent(ConversationEndEvent):
"""
Event triggered when a response has been rendered on the assistant.
"""
def __init__(self, *args, with_follow_on_turn: bool = False, **kwargs):
"""
:param with_follow_on_turn: Set to true if the conversation expects a
user follow-up, false otherwise.
"""
super().__init__(
*args,
with_follow_on_turn=with_follow_on_turn,
**kwargs,
)
class NoResponseEvent(ConversationEndEvent):
"""
Event triggered when a conversation ends with no response
@ -205,6 +222,42 @@ class SpeechRecognizedEvent(AssistantEvent):
return result
class IntentMatchedEvent(AssistantEvent):
"""
Event triggered when an intent is matched by a speech command.
"""
def __init__(
self, *args, intent: str, slots: Optional[Mapping[str, Any]] = None, **kwargs
):
"""
:param intent: The intent that has been matched.
:param slots: The slots extracted from the intent, as a key-value mapping.
"""
super().__init__(*args, intent=intent, slots=slots or {}, **kwargs)
def _matches_argument(
self, argname, condition_value, event_args, result: EventMatchResult
):
if argname != 'slots':
return super()._matches_argument(
argname, condition_value, event_args, result
)
event_slots = set(event_args.get(argname, {}).items())
slots = set(self.args.get(argname, {}).items())
# All the slots in the condition must be in the event
if slots.difference(event_slots) == 0:
result.is_match = True
result.score += 1
else:
result.is_match = False
result.score = 0
return result
class HotwordDetectedEvent(AssistantEvent):
"""
Event triggered when a custom hotword is detected

View File

@ -242,7 +242,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
tts.say(text=text, **self.tts_plugin_args)
def _on_response_render_end(self):
pass
from platypush.message.event.assistant import ResponseEndEvent
self._send_event(ResponseEndEvent)
def _on_hotword_detected(self, hotword: Optional[str]):
from platypush.message.event.assistant import HotwordDetectedEvent
@ -256,6 +258,11 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self._last_query = phrase
self._send_event(SpeechRecognizedEvent, phrase=phrase)
def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None):
from platypush.message.event.assistant import IntentMatchedEvent
self._send_event(IntentMatchedEvent, intent=intent, slots=slots)
def _on_alarm_start(self):
from platypush.message.event.assistant import AlarmStartedEvent

View File

@ -59,6 +59,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None,
speech_model_path: Optional[str] = None,
intent_model_path: Optional[str] = None,
endpoint_duration: Optional[float] = 0.5,
enable_automatic_punctuation: bool = False,
start_conversation_on_hotword: bool = True,
@ -106,6 +107,54 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
to train your custom models. You can use a base model and fine-tune
it by boosting the detection of your own words and phrases and edit
the phonetic representation of the words you want to detect.
:param intent_model_path: Path to the Rhino context model. This is
required if you want to use the intent recognition engine through
Rhino. The context model is a file that contains a list of intents
that can be recognized by the engine. An intent is an action or a
class of actions that the assistant can recognize, and it can
contain an optional number of slots to model context variables -
e.g. temperature, lights group, location, device state etc.
You can create your own context model using the `Rhino console
<https://console.picovoice.ai/rhn>`_. For example, you can define a
context file to control smart home devices by defining the
following slots:
- ``device_type``: The device to control (e.g. lights, music)
- ``device_state``: The target state of the device (e.g. on,
off)
- ``location``: The location of the device (e.g. living
room, kitchen, bedroom)
- ``media_type``: The type of media to play (e.g. music, video)
- ``media_state``: The state of the media (e.g. play, pause,
stop)
You can then define the following intents:
- ``device_ctrl``: Control a device state. Supported phrases:
- "turn ``$device_state:state`` the ``$location:location``
``$device_type:device``"
- "turn ``$device_state:state`` the ``$device_type:device``"
- ``media_ctrl``: Control media state. Supported phrases:
- "``$media_state:state`` the ``$media_type:media``"
- "``$media_state:state`` the ``$media_type:media`` in the
``$location:location``"
Then a phrase like "turn on the lights in the living room" would
trigger a
:class:`platypush.message.event.assistant.IntentMatchedEvent` with:
.. code-block:: json
{
"intent": "device_ctrl",
"slots": {
"type": "lights",
"state": "on",
"location": "living room"
}
}
:param endpoint_duration: If set, the assistant will stop listening when
no speech is detected for the specified duration (in seconds) after
the end of an utterance.
@ -144,9 +193,19 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
'stt_enabled': stt_enabled,
'intent_enabled': intent_enabled,
'keywords': keywords,
'keyword_paths': keyword_paths,
'keyword_model_path': keyword_model_path,
'speech_model_path': speech_model_path,
'keyword_paths': (
os.path.expanduser(keyword_path)
for keyword_path in (keyword_paths or [])
),
'keyword_model_path': (
os.path.expanduser(keyword_model_path) if keyword_model_path else None
),
'speech_model_path': (
os.path.expanduser(speech_model_path) if speech_model_path else None
),
'intent_model_path': (
os.path.expanduser(intent_model_path) if intent_model_path else None
),
'endpoint_duration': endpoint_duration,
'enable_automatic_punctuation': enable_automatic_punctuation,
'start_conversation_on_hotword': start_conversation_on_hotword,
@ -193,6 +252,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
self.logger.warning('Assistant not initialized')
return
if not model_file:
model_file = self._assistant_args['speech_model_path']
if model_file:
model_file = os.path.expanduser(model_file)
@ -278,6 +339,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
import pvleopard
audio_file = os.path.expanduser(audio_file)
if not model_file:
model_file = self._assistant_args['speech_model_path']
if model_file:
model_file = os.path.expanduser(model_file)
@ -286,18 +349,22 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
)
transcript, words = leopard.process_file(audio_file)
return {
'transcription': transcript,
'words': [
{
'word': word.word,
'start': word.start_sec,
'end': word.end_sec,
'confidence': word.confidence,
}
for word in words
],
}
try:
return {
'transcription': transcript,
'words': [
{
'word': word.word,
'start': word.start_sec,
'end': word.end_sec,
'confidence': word.confidence,
}
for word in words
],
}
finally:
leopard.delete()
@action
def mute(self, *_, **__):

View File

@ -24,8 +24,11 @@ def OrEvent(*events, cls: Type = threading.Event):
or_event.clear()
def _to_or(e, changed_callback: Callable[[], None]):
e._set = e.set
e._clear = e.clear
if not hasattr(e, "_set"):
e._set = e.set
if not hasattr(e, "_clear"):
e._clear = e.clear
e.changed = changed_callback
e.set = lambda: _or_set(e)
e.clear = lambda: _clear_or(e)