Better integration with the native base API of the assistant plugin.

This commit is contained in:
Fabio Manganiello 2024-04-08 03:02:03 +02:00
parent f7517eb321
commit bb38f2439c
4 changed files with 87 additions and 49 deletions

View file

@ -9,21 +9,22 @@ from platypush.context import get_bus, get_plugin
from platypush.entities.assistants import Assistant from platypush.entities.assistants import Assistant
from platypush.entities.managers.assistants import AssistantEntityManager from platypush.entities.managers.assistants import AssistantEntityManager
from platypush.message.event.assistant import ( from platypush.message.event.assistant import (
AssistantEvent,
ConversationStartEvent,
ConversationEndEvent,
ConversationTimeoutEvent,
ResponseEvent,
NoResponseEvent,
SpeechRecognizedEvent,
AlarmStartedEvent,
AlarmEndEvent, AlarmEndEvent,
TimerStartedEvent, AlarmStartedEvent,
TimerEndEvent,
AlertStartedEvent,
AlertEndEvent, AlertEndEvent,
AlertStartedEvent,
AssistantEvent,
ConversationEndEvent,
ConversationStartEvent,
ConversationTimeoutEvent,
HotwordDetectedEvent,
MicMutedEvent, MicMutedEvent,
MicUnmutedEvent, MicUnmutedEvent,
NoResponseEvent,
ResponseEvent,
SpeechRecognizedEvent,
TimerEndEvent,
TimerStartedEvent,
) )
from platypush.plugins import Plugin, action from platypush.plugins import Plugin, action
from platypush.utils import get_plugin_name_by_class from platypush.utils import get_plugin_name_by_class
@ -235,6 +236,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self.stop_conversation() self.stop_conversation()
tts.say(text=text, **self.tts_plugin_args) tts.say(text=text, **self.tts_plugin_args)
def _on_hotword_detected(self, hotword: Optional[str]):
self._send_event(HotwordDetectedEvent, hotword=hotword)
def _on_speech_recognized(self, phrase: Optional[str]): def _on_speech_recognized(self, phrase: Optional[str]):
phrase = (phrase or '').lower().strip() phrase = (phrase or '').lower().strip()
self._last_query = phrase self._last_query = phrase

View file

@ -1,10 +1,10 @@
from typing import Optional, Sequence from typing import Optional, Sequence
from platypush.context import get_bus
from platypush.plugins import RunnablePlugin, action from platypush.plugins import RunnablePlugin, action
from platypush.plugins.assistant import AssistantPlugin from platypush.plugins.assistant import AssistantPlugin
from ._assistant import Assistant from ._assistant import Assistant
from ._state import AssistantState
# pylint: disable=too-many-ancestors # pylint: disable=too-many-ancestors
@ -56,7 +56,7 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
enable_automatic_punctuation: bool = False, enable_automatic_punctuation: bool = False,
start_conversation_on_hotword: bool = True, start_conversation_on_hotword: bool = True,
audio_queue_size: int = 100, audio_queue_size: int = 100,
conversation_timeout: Optional[float] = 5.0, conversation_timeout: Optional[float] = 7.5,
**kwargs, **kwargs,
): ):
""" """
@ -116,9 +116,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
detected after the hotword is detected. If no speech is detected detected after the hotword is detected. If no speech is detected
within this time, the conversation will time out and the plugin will within this time, the conversation will time out and the plugin will
go back into hotword detection mode, if the mode is enabled. Default: go back into hotword detection mode, if the mode is enabled. Default:
5 seconds. 7.5 seconds.
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self._assistant = None
self._assistant_args = { self._assistant_args = {
'stop_event': self._should_stop, 'stop_event': self._should_stop,
'access_key': access_key, 'access_key': access_key,
@ -134,6 +135,11 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
'start_conversation_on_hotword': start_conversation_on_hotword, 'start_conversation_on_hotword': start_conversation_on_hotword,
'audio_queue_size': audio_queue_size, 'audio_queue_size': audio_queue_size,
'conversation_timeout': conversation_timeout, 'conversation_timeout': conversation_timeout,
'on_conversation_start': self._on_conversation_start,
'on_conversation_end': self._on_conversation_end,
'on_conversation_timeout': self._on_conversation_timeout,
'on_speech_recognized': self._on_speech_recognized,
'on_hotword_detected': self._on_hotword_detected,
} }
@action @action
@ -141,12 +147,25 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
""" """
Programmatically start a conversation with the assistant Programmatically start a conversation with the assistant
""" """
if not self._assistant:
self.logger.warning('Assistant not initialized')
return
self._assistant.state = AssistantState.DETECTING_SPEECH
@action @action
def stop_conversation(self, *_, **__): def stop_conversation(self, *_, **__):
""" """
Programmatically stop a running conversation with the assistant Programmatically stop a running conversation with the assistant
""" """
if not self._assistant:
self.logger.warning('Assistant not initialized')
return
if self._assistant.hotword_enabled:
self._assistant.state = AssistantState.DETECTING_HOTWORD
else:
self._assistant.state = AssistantState.IDLE
@action @action
def mute(self, *_, **__): def mute(self, *_, **__):
@ -189,12 +208,10 @@ class PicovoicePlugin(AssistantPlugin, RunnablePlugin):
def main(self): def main(self):
while not self.should_stop(): while not self.should_stop():
self.logger.info('Starting Picovoice assistant') self.logger.info('Starting Picovoice assistant')
with Assistant(**self._assistant_args) as assistant: with Assistant(**self._assistant_args) as self._assistant:
try: try:
for event in assistant: for event in self._assistant:
if event: self.logger.debug('Picovoice assistant event: %s', event)
event.args['assistant'] = 'picovoice'
get_bus().post(event)
except KeyboardInterrupt: except KeyboardInterrupt:
break break
except Exception as e: except Exception as e:

View file

@ -9,16 +9,13 @@ import pvleopard
import pvporcupine import pvporcupine
import pvrhino import pvrhino
from platypush.context import get_bus
from platypush.message.event.assistant import ( from platypush.message.event.assistant import (
ConversationStartEvent,
ConversationEndEvent,
ConversationTimeoutEvent, ConversationTimeoutEvent,
HotwordDetectedEvent, HotwordDetectedEvent,
SpeechRecognizedEvent, SpeechRecognizedEvent,
) )
from ._context import SpeechDetectionContext from ._context import ConversationContext
from ._recorder import AudioRecorder from ._recorder import AudioRecorder
from ._state import AssistantState from ._state import AssistantState
@ -28,6 +25,9 @@ class Assistant:
A facade class that wraps the Picovoice engines under an assistant API. A facade class that wraps the Picovoice engines under an assistant API.
""" """
def _default_callback(*_, **__):
pass
def __init__( def __init__(
self, self,
access_key: str, access_key: str,
@ -45,6 +45,11 @@ class Assistant:
start_conversation_on_hotword: bool = False, start_conversation_on_hotword: bool = False,
audio_queue_size: int = 100, audio_queue_size: int = 100,
conversation_timeout: Optional[float] = None, conversation_timeout: Optional[float] = None,
on_conversation_start=_default_callback,
on_conversation_end=_default_callback,
on_conversation_timeout=_default_callback,
on_speech_recognized=_default_callback,
on_hotword_detected=_default_callback,
): ):
self._access_key = access_key self._access_key = access_key
self._stop_event = stop_event self._stop_event = stop_event
@ -62,10 +67,16 @@ class Assistant:
self.start_conversation_on_hotword = start_conversation_on_hotword self.start_conversation_on_hotword = start_conversation_on_hotword
self.audio_queue_size = audio_queue_size self.audio_queue_size = audio_queue_size
self._on_conversation_start = on_conversation_start
self._on_conversation_end = on_conversation_end
self._on_conversation_timeout = on_conversation_timeout
self._on_speech_recognized = on_speech_recognized
self._on_hotword_detected = on_hotword_detected
self._recorder = None self._recorder = None
self._state = AssistantState.IDLE self._state = AssistantState.IDLE
self._state_lock = RLock() self._state_lock = RLock()
self._speech_ctx = SpeechDetectionContext(timeout=conversation_timeout) self._ctx = ConversationContext(timeout=conversation_timeout)
if hotword_enabled: if hotword_enabled:
if not keywords: if not keywords:
@ -119,11 +130,11 @@ class Assistant:
return return
if prev_state == AssistantState.DETECTING_SPEECH: if prev_state == AssistantState.DETECTING_SPEECH:
self._speech_ctx.stop() self._ctx.stop()
self._post_event(ConversationEndEvent()) self._on_conversation_end()
elif new_state == AssistantState.DETECTING_SPEECH: elif new_state == AssistantState.DETECTING_SPEECH:
self._speech_ctx.start() self._ctx.start()
self._post_event(ConversationStartEvent()) self._on_conversation_start()
@property @property
def porcupine(self) -> Optional[pvporcupine.Porcupine]: def porcupine(self) -> Optional[pvporcupine.Porcupine]:
@ -239,11 +250,6 @@ class Assistant:
raise StopIteration raise StopIteration
def _post_event(self, event):
if event:
event.args['assistant'] = 'picovoice'
get_bus().post(event)
def _process_hotword(self, frame): def _process_hotword(self, frame):
if not self.porcupine: if not self.porcupine:
return None return None
@ -256,6 +262,7 @@ class Assistant:
if self.start_conversation_on_hotword: if self.start_conversation_on_hotword:
self.state = AssistantState.DETECTING_SPEECH self.state = AssistantState.DETECTING_SPEECH
self._on_hotword_detected(hotword=self.keywords[keyword_index])
return HotwordDetectedEvent(hotword=self.keywords[keyword_index]) return HotwordDetectedEvent(hotword=self.keywords[keyword_index])
return None return None
@ -265,26 +272,36 @@ class Assistant:
return None return None
event = None event = None
( partial_transcript, self._ctx.is_final = self.cheetah.process(frame)
self._speech_ctx.partial_transcript,
self._speech_ctx.is_final,
) = self.cheetah.process(frame)
if self._speech_ctx.partial_transcript: if partial_transcript:
self._ctx.partial_transcript += partial_transcript
self.logger.info( self.logger.info(
'Partial transcript: %s, is_final: %s', 'Partial transcript: %s, is_final: %s',
self._speech_ctx.partial_transcript, self._ctx.partial_transcript,
self._speech_ctx.is_final, self._ctx.is_final,
) )
if self._speech_ctx.is_final or self._speech_ctx.timed_out: if self._ctx.is_final or self._ctx.timed_out:
event = ( phrase = ''
ConversationTimeoutEvent() if self.cheetah:
if self._speech_ctx.timed_out phrase = self.cheetah.flush()
else SpeechRecognizedEvent(phrase=self.cheetah.flush())
)
if self.porcupine: if not self._ctx.is_final:
self._ctx.partial_transcript += phrase
phrase = self._ctx.partial_transcript
phrase = phrase[:1].lower() + phrase[1:]
if self._ctx.is_final or phrase:
event = SpeechRecognizedEvent(phrase=phrase)
self._on_speech_recognized(phrase=phrase)
else:
event = ConversationTimeoutEvent()
self._on_conversation_timeout()
self._ctx.reset()
if self.hotword_enabled:
self.state = AssistantState.DETECTING_HOTWORD self.state = AssistantState.DETECTING_HOTWORD
return event return event

View file

@ -4,9 +4,9 @@ from typing import Optional
@dataclass @dataclass
class SpeechDetectionContext: class ConversationContext:
""" """
Context of the speech detection process. Context of the conversation process.
""" """
partial_transcript: str = '' partial_transcript: str = ''