Compare commits

...

3 Commits

Author SHA1 Message Date
Fabio Manganiello 7c504685e2
Prevent a potential recursion error in `wait_for_either`.
continuous-integration/drone/push Build is passing Details
We shouldn't overwrite `event._set` and `event._clear` if those values
have already been set.

Those attributes hold the original references to `Event.set` and
`Event.clear` respectively, and the `OrEvent` logic overwrites them with
a callback-based logic.

This shouldn't happen if those attributes are already present.
2024-04-14 23:27:13 +02:00
Fabio Manganiello 699f92e42b
[assistant] Added `ResponseEndEvent` and `IntentMatchedEvent` 2024-04-14 23:26:59 +02:00
Fabio Manganiello 317241eb36
[assistant.picovoice] Various improvements.
- Added `intent_model_path` parameter.

- Always apply `expanduser` to configuration paths.

- Better logic to infer the fallback model path.

- The Picovoice Leonardo object should always be removed after
  `assistant.picovoice.transcribe` is called.
2024-04-14 21:24:06 +02:00
4 changed files with 150 additions and 20 deletions

View File

@ -1,9 +1,9 @@
import re import re
import sys import sys
from typing import Optional, Union from typing import Any, Mapping, Optional, Union
from platypush.context import get_plugin from platypush.context import get_plugin
from platypush.message.event import Event from platypush.message.event import Event, EventMatchResult
from platypush.plugins.assistant import AssistantPlugin from platypush.plugins.assistant import AssistantPlugin
from platypush.utils import get_plugin_name_by_class from platypush.utils import get_plugin_name_by_class
@ -102,6 +102,23 @@ class ResponseEvent(AssistantEvent):
) )
class ResponseEndEvent(ConversationEndEvent):
"""
Event triggered when a response has been rendered on the assistant.
"""
def __init__(self, *args, with_follow_on_turn: bool = False, **kwargs):
"""
:param with_follow_on_turn: Set to true if the conversation expects a
user follow-up, false otherwise.
"""
super().__init__(
*args,
with_follow_on_turn=with_follow_on_turn,
**kwargs,
)
class NoResponseEvent(ConversationEndEvent): class NoResponseEvent(ConversationEndEvent):
""" """
Event triggered when a conversation ends with no response Event triggered when a conversation ends with no response
@ -205,6 +222,42 @@ class SpeechRecognizedEvent(AssistantEvent):
return result return result
class IntentMatchedEvent(AssistantEvent):
"""
Event triggered when an intent is matched by a speech command.
"""
def __init__(
self, *args, intent: str, slots: Optional[Mapping[str, Any]] = None, **kwargs
):
"""
:param intent: The intent that has been matched.
:param slots: The slots extracted from the intent, as a key-value mapping.
"""
super().__init__(*args, intent=intent, slots=slots or {}, **kwargs)
def _matches_argument(
self, argname, condition_value, event_args, result: EventMatchResult
):
if argname != 'slots':
return super()._matches_argument(
argname, condition_value, event_args, result
)
event_slots = set(event_args.get(argname, {}).items())
slots = set(self.args.get(argname, {}).items())
# All the slots in the condition must be in the event
if slots.difference(event_slots) == 0:
result.is_match = True
result.score += 1
else:
result.is_match = False
result.score = 0
return result
class HotwordDetectedEvent(AssistantEvent): class HotwordDetectedEvent(AssistantEvent):
""" """
Event triggered when a custom hotword is detected Event triggered when a custom hotword is detected

View File

@ -242,7 +242,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
tts.say(text=text, **self.tts_plugin_args) tts.say(text=text, **self.tts_plugin_args)
def _on_response_render_end(self): def _on_response_render_end(self):
pass from platypush.message.event.assistant import ResponseEndEvent
self._send_event(ResponseEndEvent)
def _on_hotword_detected(self, hotword: Optional[str]): def _on_hotword_detected(self, hotword: Optional[str]):
from platypush.message.event.assistant import HotwordDetectedEvent from platypush.message.event.assistant import HotwordDetectedEvent
@ -256,6 +258,11 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self._last_query = phrase self._last_query = phrase
self._send_event(SpeechRecognizedEvent, phrase=phrase) self._send_event(SpeechRecognizedEvent, phrase=phrase)
def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None):
from platypush.message.event.assistant import IntentMatchedEvent
self._send_event(IntentMatchedEvent, intent=intent, slots=slots)
def _on_alarm_start(self): def _on_alarm_start(self):
from platypush.message.event.assistant import AlarmStartedEvent from platypush.message.event.assistant import AlarmStartedEvent

View File

@ -59,6 +59,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
keyword_paths: Optional[Sequence[str]] = None, keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None, keyword_model_path: Optional[str] = None,
speech_model_path: Optional[str] = None, speech_model_path: Optional[str] = None,
intent_model_path: Optional[str] = None,
endpoint_duration: Optional[float] = 0.5, endpoint_duration: Optional[float] = 0.5,
enable_automatic_punctuation: bool = False, enable_automatic_punctuation: bool = False,
start_conversation_on_hotword: bool = True, start_conversation_on_hotword: bool = True,
@ -106,6 +107,54 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
to train your custom models. You can use a base model and fine-tune to train your custom models. You can use a base model and fine-tune
it by boosting the detection of your own words and phrases and edit it by boosting the detection of your own words and phrases and edit
the phonetic representation of the words you want to detect. the phonetic representation of the words you want to detect.
:param intent_model_path: Path to the Rhino context model. This is
required if you want to use the intent recognition engine through
Rhino. The context model is a file that contains a list of intents
that can be recognized by the engine. An intent is an action or a
class of actions that the assistant can recognize, and it can
contain an optional number of slots to model context variables -
e.g. temperature, lights group, location, device state etc.
You can create your own context model using the `Rhino console
<https://console.picovoice.ai/rhn>`_. For example, you can define a
context file to control smart home devices by defining the
following slots:
- ``device_type``: The device to control (e.g. lights, music)
- ``device_state``: The target state of the device (e.g. on,
off)
- ``location``: The location of the device (e.g. living
room, kitchen, bedroom)
- ``media_type``: The type of media to play (e.g. music, video)
- ``media_state``: The state of the media (e.g. play, pause,
stop)
You can then define the following intents:
- ``device_ctrl``: Control a device state. Supported phrases:
- "turn ``$device_state:state`` the ``$location:location``
``$device_type:device``"
- "turn ``$device_state:state`` the ``$device_type:device``"
- ``media_ctrl``: Control media state. Supported phrases:
- "``$media_state:state`` the ``$media_type:media``"
- "``$media_state:state`` the ``$media_type:media`` in the
``$location:location``"
Then a phrase like "turn on the lights in the living room" would
trigger a
:class:`platypush.message.event.assistant.IntentMatchedEvent` with:
.. code-block:: json
{
"intent": "device_ctrl",
"slots": {
"type": "lights",
"state": "on",
"location": "living room"
}
}
:param endpoint_duration: If set, the assistant will stop listening when :param endpoint_duration: If set, the assistant will stop listening when
no speech is detected for the specified duration (in seconds) after no speech is detected for the specified duration (in seconds) after
the end of an utterance. the end of an utterance.
@ -144,9 +193,19 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
'stt_enabled': stt_enabled, 'stt_enabled': stt_enabled,
'intent_enabled': intent_enabled, 'intent_enabled': intent_enabled,
'keywords': keywords, 'keywords': keywords,
'keyword_paths': keyword_paths, 'keyword_paths': (
'keyword_model_path': keyword_model_path, os.path.expanduser(keyword_path)
'speech_model_path': speech_model_path, for keyword_path in (keyword_paths or [])
),
'keyword_model_path': (
os.path.expanduser(keyword_model_path) if keyword_model_path else None
),
'speech_model_path': (
os.path.expanduser(speech_model_path) if speech_model_path else None
),
'intent_model_path': (
os.path.expanduser(intent_model_path) if intent_model_path else None
),
'endpoint_duration': endpoint_duration, 'endpoint_duration': endpoint_duration,
'enable_automatic_punctuation': enable_automatic_punctuation, 'enable_automatic_punctuation': enable_automatic_punctuation,
'start_conversation_on_hotword': start_conversation_on_hotword, 'start_conversation_on_hotword': start_conversation_on_hotword,
@ -193,6 +252,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
self.logger.warning('Assistant not initialized') self.logger.warning('Assistant not initialized')
return return
if not model_file:
model_file = self._assistant_args['speech_model_path']
if model_file: if model_file:
model_file = os.path.expanduser(model_file) model_file = os.path.expanduser(model_file)
@ -278,6 +339,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
import pvleopard import pvleopard
audio_file = os.path.expanduser(audio_file) audio_file = os.path.expanduser(audio_file)
if not model_file:
model_file = self._assistant_args['speech_model_path']
if model_file: if model_file:
model_file = os.path.expanduser(model_file) model_file = os.path.expanduser(model_file)
@ -286,18 +349,22 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
) )
transcript, words = leopard.process_file(audio_file) transcript, words = leopard.process_file(audio_file)
return {
'transcription': transcript, try:
'words': [ return {
{ 'transcription': transcript,
'word': word.word, 'words': [
'start': word.start_sec, {
'end': word.end_sec, 'word': word.word,
'confidence': word.confidence, 'start': word.start_sec,
} 'end': word.end_sec,
for word in words 'confidence': word.confidence,
], }
} for word in words
],
}
finally:
leopard.delete()
@action @action
def mute(self, *_, **__): def mute(self, *_, **__):

View File

@ -24,8 +24,11 @@ def OrEvent(*events, cls: Type = threading.Event):
or_event.clear() or_event.clear()
def _to_or(e, changed_callback: Callable[[], None]): def _to_or(e, changed_callback: Callable[[], None]):
e._set = e.set if not hasattr(e, "_set"):
e._clear = e.clear e._set = e.set
if not hasattr(e, "_clear"):
e._clear = e.clear
e.changed = changed_callback e.changed = changed_callback
e.set = lambda: _or_set(e) e.set = lambda: _or_set(e)
e.clear = lambda: _clear_or(e) e.clear = lambda: _clear_or(e)