forked from platypush/platypush
[assistant.picovoice] Sync between the speech and intent engines.
This commit is contained in:
parent
a373091c64
commit
bd4b1d3e0f
11 changed files with 192 additions and 101 deletions
|
@ -1,4 +1,4 @@
|
||||||
from multiprocessing import RLock, Queue
|
from multiprocessing import RLock, Queue, active_children
|
||||||
import os
|
import os
|
||||||
from queue import Empty
|
from queue import Empty
|
||||||
import socket
|
import socket
|
||||||
|
@ -57,6 +57,25 @@ class CommandStream(ControllableProcess):
|
||||||
self.reset()
|
self.reset()
|
||||||
return super().close()
|
return super().close()
|
||||||
|
|
||||||
|
def _term_or_kill(self, kill: bool = False, visited=None) -> None:
|
||||||
|
func = 'kill' if kill else 'terminate'
|
||||||
|
visited = visited or set()
|
||||||
|
visited.add(id(self))
|
||||||
|
|
||||||
|
for child in active_children():
|
||||||
|
child_id = id(child)
|
||||||
|
if child_id not in visited:
|
||||||
|
visited.add(child_id)
|
||||||
|
getattr(child, func)()
|
||||||
|
|
||||||
|
getattr(super(), func)()
|
||||||
|
|
||||||
|
def terminate(self, visited=None) -> None:
|
||||||
|
self._term_or_kill(kill=False, visited=visited)
|
||||||
|
|
||||||
|
def kill(self) -> None:
|
||||||
|
self._term_or_kill(kill=True)
|
||||||
|
|
||||||
def __enter__(self) -> "CommandStream":
|
def __enter__(self) -> "CommandStream":
|
||||||
self.reset()
|
self.reset()
|
||||||
sock = self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
sock = self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
|
|
|
@ -226,7 +226,7 @@ class SpeechRecognizedEvent(AssistantEvent):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
class IntentMatchedEvent(AssistantEvent):
|
class IntentRecognizedEvent(AssistantEvent):
|
||||||
"""
|
"""
|
||||||
Event triggered when an intent is matched by a speech command.
|
Event triggered when an intent is matched by a speech command.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -259,9 +259,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
|
||||||
self._send_event(SpeechRecognizedEvent, phrase=phrase)
|
self._send_event(SpeechRecognizedEvent, phrase=phrase)
|
||||||
|
|
||||||
def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None):
|
def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None):
|
||||||
from platypush.message.event.assistant import IntentMatchedEvent
|
from platypush.message.event.assistant import IntentRecognizedEvent
|
||||||
|
|
||||||
self._send_event(IntentMatchedEvent, intent=intent, slots=slots)
|
self._send_event(IntentRecognizedEvent, intent=intent, slots=slots)
|
||||||
|
|
||||||
def _on_alarm_start(self):
|
def _on_alarm_start(self):
|
||||||
from platypush.message.event.assistant import AlarmStartedEvent
|
from platypush.message.event.assistant import AlarmStartedEvent
|
||||||
|
|
|
@ -54,7 +54,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
access_key: str,
|
access_key: str,
|
||||||
hotword_enabled: bool = True,
|
hotword_enabled: bool = True,
|
||||||
stt_enabled: bool = True,
|
stt_enabled: bool = True,
|
||||||
intent_enabled: bool = False,
|
|
||||||
keywords: Optional[Sequence[str]] = None,
|
keywords: Optional[Sequence[str]] = None,
|
||||||
keyword_paths: Optional[Sequence[str]] = None,
|
keyword_paths: Optional[Sequence[str]] = None,
|
||||||
keyword_model_path: Optional[str] = None,
|
keyword_model_path: Optional[str] = None,
|
||||||
|
@ -77,10 +76,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
:param stt_enabled: Enable the speech-to-text engine (default: True).
|
:param stt_enabled: Enable the speech-to-text engine (default: True).
|
||||||
**Note**: The speech-to-text engine requires you to add Cheetah to
|
**Note**: The speech-to-text engine requires you to add Cheetah to
|
||||||
the products available in your Picovoice account.
|
the products available in your Picovoice account.
|
||||||
:param intent_enabled: Enable the intent recognition engine (default:
|
|
||||||
False).
|
|
||||||
**Note**: The intent recognition engine requires you to add Rhino
|
|
||||||
to the products available in your Picovoice account.
|
|
||||||
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
|
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
|
||||||
google``...). This is required if the wake-word engine is enabled.
|
google``...). This is required if the wake-word engine is enabled.
|
||||||
See the `Porcupine keywords repository
|
See the `Porcupine keywords repository
|
||||||
|
@ -142,7 +137,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
|
|
||||||
Then a phrase like "turn on the lights in the living room" would
|
Then a phrase like "turn on the lights in the living room" would
|
||||||
trigger a
|
trigger a
|
||||||
:class:`platypush.message.event.assistant.IntentMatchedEvent` with:
|
:class:`platypush.message.event.assistant.IntentRecognizedEvent` with:
|
||||||
|
|
||||||
.. code-block:: json
|
.. code-block:: json
|
||||||
|
|
||||||
|
@ -155,6 +150,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
**Note**: The intent recognition engine requires you to add Rhino
|
||||||
|
to the products available in your Picovoice account.
|
||||||
:param endpoint_duration: If set, the assistant will stop listening when
|
:param endpoint_duration: If set, the assistant will stop listening when
|
||||||
no speech is detected for the specified duration (in seconds) after
|
no speech is detected for the specified duration (in seconds) after
|
||||||
the end of an utterance.
|
the end of an utterance.
|
||||||
|
@ -191,7 +188,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
'access_key': access_key,
|
'access_key': access_key,
|
||||||
'hotword_enabled': hotword_enabled,
|
'hotword_enabled': hotword_enabled,
|
||||||
'stt_enabled': stt_enabled,
|
'stt_enabled': stt_enabled,
|
||||||
'intent_enabled': intent_enabled,
|
|
||||||
'keywords': keywords,
|
'keywords': keywords,
|
||||||
'keyword_paths': (
|
'keyword_paths': (
|
||||||
os.path.expanduser(keyword_path)
|
os.path.expanduser(keyword_path)
|
||||||
|
@ -208,7 +204,11 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
),
|
),
|
||||||
'endpoint_duration': endpoint_duration,
|
'endpoint_duration': endpoint_duration,
|
||||||
'enable_automatic_punctuation': enable_automatic_punctuation,
|
'enable_automatic_punctuation': enable_automatic_punctuation,
|
||||||
'start_conversation_on_hotword': start_conversation_on_hotword,
|
'start_conversation_on_hotword': (
|
||||||
|
start_conversation_on_hotword
|
||||||
|
if (intent_model_path or stt_enabled)
|
||||||
|
else False
|
||||||
|
),
|
||||||
'audio_queue_size': audio_queue_size,
|
'audio_queue_size': audio_queue_size,
|
||||||
'conversation_timeout': conversation_timeout,
|
'conversation_timeout': conversation_timeout,
|
||||||
'muted': muted,
|
'muted': muted,
|
||||||
|
@ -420,7 +420,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
|
||||||
try:
|
try:
|
||||||
for event in self._assistant:
|
for event in self._assistant:
|
||||||
if event is not None:
|
if event is not None:
|
||||||
self.logger.debug('Picovoice assistant event: %s', event)
|
self.logger.debug('Dequeued assistant event: %s', event)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -12,7 +12,7 @@ from platypush.message.event.assistant import (
|
||||||
AssistantEvent,
|
AssistantEvent,
|
||||||
ConversationTimeoutEvent,
|
ConversationTimeoutEvent,
|
||||||
HotwordDetectedEvent,
|
HotwordDetectedEvent,
|
||||||
IntentMatchedEvent,
|
IntentRecognizedEvent,
|
||||||
SpeechRecognizedEvent,
|
SpeechRecognizedEvent,
|
||||||
)
|
)
|
||||||
from platypush.plugins.tts.picovoice import TtsPicovoicePlugin
|
from platypush.plugins.tts.picovoice import TtsPicovoicePlugin
|
||||||
|
@ -37,7 +37,6 @@ class Assistant(Thread):
|
||||||
stop_event: Event,
|
stop_event: Event,
|
||||||
hotword_enabled: bool = True,
|
hotword_enabled: bool = True,
|
||||||
stt_enabled: bool = True,
|
stt_enabled: bool = True,
|
||||||
intent_enabled: bool = False,
|
|
||||||
keywords: Optional[Sequence[str]] = None,
|
keywords: Optional[Sequence[str]] = None,
|
||||||
keyword_paths: Optional[Sequence[str]] = None,
|
keyword_paths: Optional[Sequence[str]] = None,
|
||||||
keyword_model_path: Optional[str] = None,
|
keyword_model_path: Optional[str] = None,
|
||||||
|
@ -58,15 +57,12 @@ class Assistant(Thread):
|
||||||
on_hotword_detected=_default_callback,
|
on_hotword_detected=_default_callback,
|
||||||
):
|
):
|
||||||
super().__init__(name='picovoice:Assistant')
|
super().__init__(name='picovoice:Assistant')
|
||||||
if intent_enabled:
|
|
||||||
assert intent_model_path, 'Intent model path not provided'
|
|
||||||
|
|
||||||
self._access_key = access_key
|
self._access_key = access_key
|
||||||
self._stop_event = stop_event
|
self._stop_event = stop_event
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.hotword_enabled = hotword_enabled
|
self.hotword_enabled = hotword_enabled
|
||||||
self.stt_enabled = stt_enabled
|
self.stt_enabled = stt_enabled
|
||||||
self.intent_enabled = intent_enabled
|
|
||||||
self.keywords = list(keywords or [])
|
self.keywords = list(keywords or [])
|
||||||
self.keyword_paths = None
|
self.keyword_paths = None
|
||||||
self.keyword_model_path = None
|
self.keyword_model_path = None
|
||||||
|
@ -86,11 +82,11 @@ class Assistant(Thread):
|
||||||
self._speech_processor = SpeechProcessor(
|
self._speech_processor = SpeechProcessor(
|
||||||
stop_event=stop_event,
|
stop_event=stop_event,
|
||||||
stt_enabled=stt_enabled,
|
stt_enabled=stt_enabled,
|
||||||
intent_enabled=intent_enabled,
|
intent_enabled=self.intent_enabled,
|
||||||
conversation_timeout=conversation_timeout,
|
conversation_timeout=conversation_timeout,
|
||||||
model_path=speech_model_path,
|
model_path=speech_model_path,
|
||||||
get_cheetah_args=self._get_speech_engine_args,
|
get_cheetah_args=self._get_speech_engine_args,
|
||||||
get_rhino_args=self._get_speech_engine_args,
|
get_rhino_args=self._get_intent_engine_args,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._on_conversation_start = on_conversation_start
|
self._on_conversation_start = on_conversation_start
|
||||||
|
@ -133,15 +129,19 @@ class Assistant(Thread):
|
||||||
self._porcupine: Optional[pvporcupine.Porcupine] = None
|
self._porcupine: Optional[pvporcupine.Porcupine] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_responding(self):
|
def intent_enabled(self) -> bool:
|
||||||
|
return self.intent_model_path is not None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_responding(self) -> bool:
|
||||||
return self._responding.is_set()
|
return self._responding.is_set()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def speech_model_path(self):
|
def speech_model_path(self) -> Optional[str]:
|
||||||
return self._speech_model_path_override or self._speech_model_path
|
return self._speech_model_path_override or self._speech_model_path
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def intent_model_path(self):
|
def intent_model_path(self) -> Optional[str]:
|
||||||
return self._intent_model_path_override or self._intent_model_path
|
return self._intent_model_path_override or self._intent_model_path
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -224,6 +224,16 @@ class Assistant(Thread):
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
def _get_intent_engine_args(self) -> dict:
|
||||||
|
args: Dict[str, Any] = {'access_key': self._access_key}
|
||||||
|
args['context_path'] = self.intent_model_path
|
||||||
|
if self.endpoint_duration:
|
||||||
|
args['endpoint_duration_sec'] = self.endpoint_duration
|
||||||
|
if self.enable_automatic_punctuation:
|
||||||
|
args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
"""
|
"""
|
||||||
Get the assistant ready to start processing audio frames.
|
Get the assistant ready to start processing audio frames.
|
||||||
|
@ -301,13 +311,16 @@ class Assistant(Thread):
|
||||||
|
|
||||||
if isinstance(evt, SpeechRecognizedEvent):
|
if isinstance(evt, SpeechRecognizedEvent):
|
||||||
self._on_speech_recognized(phrase=evt.args['phrase'])
|
self._on_speech_recognized(phrase=evt.args['phrase'])
|
||||||
if isinstance(evt, IntentMatchedEvent):
|
if isinstance(evt, IntentRecognizedEvent):
|
||||||
self._on_intent_matched(
|
self._on_intent_matched(
|
||||||
intent=evt.args['intent'], slots=evt.args.get('slots', {})
|
intent=evt.args['intent'], slots=evt.args.get('slots', {})
|
||||||
)
|
)
|
||||||
if isinstance(evt, ConversationTimeoutEvent):
|
if isinstance(evt, ConversationTimeoutEvent):
|
||||||
self._on_conversation_timeout()
|
self._on_conversation_timeout()
|
||||||
|
|
||||||
|
if evt:
|
||||||
|
self._speech_processor.reset()
|
||||||
|
|
||||||
if (
|
if (
|
||||||
evt
|
evt
|
||||||
and self.state == AssistantState.DETECTING_SPEECH
|
and self.state == AssistantState.DETECTING_SPEECH
|
||||||
|
|
|
@ -16,22 +16,16 @@ class ConversationContext:
|
||||||
intent: Optional[Intent] = None
|
intent: Optional[Intent] = None
|
||||||
timeout: Optional[float] = None
|
timeout: Optional[float] = None
|
||||||
t_start: Optional[float] = None
|
t_start: Optional[float] = None
|
||||||
t_end: Optional[float] = None
|
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
self.reset()
|
self.reset()
|
||||||
self.t_start = time()
|
self.t_start = time()
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
self.reset()
|
|
||||||
self.t_end = time()
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.transcript = ''
|
self.transcript = ''
|
||||||
self.intent = None
|
self.intent = None
|
||||||
self.is_final = False
|
self.is_final = False
|
||||||
self.t_start = None
|
self.t_start = None
|
||||||
self.t_end = None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def timed_out(self):
|
def timed_out(self):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from queue import Empty, Queue
|
from queue import Empty, Queue
|
||||||
from threading import Event, Thread, get_ident
|
from threading import Event, RLock, Thread, get_ident
|
||||||
from typing import Optional, Sequence
|
from typing import Any, Optional, Sequence
|
||||||
|
|
||||||
from platypush.message.event.assistant import AssistantEvent
|
from platypush.message.event.assistant import AssistantEvent
|
||||||
|
|
||||||
|
@ -19,12 +19,14 @@ class BaseProcessor(ABC, Thread):
|
||||||
self,
|
self,
|
||||||
*args,
|
*args,
|
||||||
stop_event: Event,
|
stop_event: Event,
|
||||||
|
enabled: bool = True,
|
||||||
conversation_timeout: Optional[float] = None,
|
conversation_timeout: Optional[float] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
super().__init__(*args, name=f'picovoice:{self.__class__.__name__}', **kwargs)
|
super().__init__(*args, name=f'picovoice:{self.__class__.__name__}', **kwargs)
|
||||||
|
|
||||||
self.logger = logging.getLogger(self.name)
|
self.logger = logging.getLogger(self.name)
|
||||||
|
self._enabled = enabled
|
||||||
self._audio_queue = Queue()
|
self._audio_queue = Queue()
|
||||||
self._stop_event = stop_event
|
self._stop_event = stop_event
|
||||||
self._ctx = ConversationContext(timeout=conversation_timeout)
|
self._ctx = ConversationContext(timeout=conversation_timeout)
|
||||||
|
@ -36,6 +38,7 @@ class BaseProcessor(ABC, Thread):
|
||||||
# processing and it's ready to accept a new audio frame
|
# processing and it's ready to accept a new audio frame
|
||||||
self._processing_done = Event()
|
self._processing_done = Event()
|
||||||
self._processing_done.set()
|
self._processing_done.set()
|
||||||
|
self._state_lock = RLock()
|
||||||
|
|
||||||
def should_stop(self) -> bool:
|
def should_stop(self) -> bool:
|
||||||
return self._stop_event.is_set()
|
return self._stop_event.is_set()
|
||||||
|
@ -44,10 +47,26 @@ class BaseProcessor(ABC, Thread):
|
||||||
return self._stop_event.wait(timeout)
|
return self._stop_event.wait(timeout)
|
||||||
|
|
||||||
def enqueue(self, audio: Sequence[int]):
|
def enqueue(self, audio: Sequence[int]):
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
self._event_wait.set()
|
self._event_wait.set()
|
||||||
self._processing_done.clear()
|
self._processing_done.clear()
|
||||||
self._audio_queue.put_nowait(audio)
|
self._audio_queue.put_nowait(audio)
|
||||||
|
|
||||||
|
def reset(self) -> Optional[Any]:
|
||||||
|
"""
|
||||||
|
Reset any pending context.
|
||||||
|
"""
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self._state_lock:
|
||||||
|
self._ctx.reset()
|
||||||
|
self._event_queue.queue.clear()
|
||||||
|
self._event_wait.clear()
|
||||||
|
self._processing_done.set()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def processing_done(self) -> Event:
|
def processing_done(self) -> Event:
|
||||||
return self._processing_done
|
return self._processing_done
|
||||||
|
@ -77,20 +96,18 @@ class BaseProcessor(ABC, Thread):
|
||||||
"""
|
"""
|
||||||
:return: The latest event that was processed by the processor.
|
:return: The latest event that was processed by the processor.
|
||||||
"""
|
"""
|
||||||
evt = None
|
with self._state_lock:
|
||||||
try:
|
evt = None
|
||||||
while True:
|
try:
|
||||||
evt = self._event_queue.get_nowait()
|
while True:
|
||||||
except Empty:
|
evt = self._event_queue.get_nowait()
|
||||||
pass
|
except Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
if evt:
|
if evt:
|
||||||
self._event_wait.clear()
|
self._event_wait.clear()
|
||||||
|
|
||||||
return evt
|
return evt
|
||||||
|
|
||||||
def clear_wait(self):
|
|
||||||
self._event_wait.clear()
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def process(self, audio: Sequence[int]) -> Optional[AssistantEvent]:
|
def process(self, audio: Sequence[int]) -> Optional[AssistantEvent]:
|
||||||
|
@ -100,7 +117,10 @@ class BaseProcessor(ABC, Thread):
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
super().run()
|
super().run()
|
||||||
self._ctx.reset()
|
if not self._enabled:
|
||||||
|
self.wait_stop()
|
||||||
|
|
||||||
|
self.reset()
|
||||||
self._processing_done.clear()
|
self._processing_done.clear()
|
||||||
self.logger.info('Processor started: %s', self.name)
|
self.logger.info('Processor started: %s', self.name)
|
||||||
|
|
||||||
|
@ -119,7 +139,11 @@ class BaseProcessor(ABC, Thread):
|
||||||
try:
|
try:
|
||||||
self._processing_done.clear()
|
self._processing_done.clear()
|
||||||
event = self.process(audio)
|
event = self.process(audio)
|
||||||
|
|
||||||
if event:
|
if event:
|
||||||
|
self.logger.debug(
|
||||||
|
'Dispatching event processed from %s: %s', self.name, event
|
||||||
|
)
|
||||||
self._event_queue.put_nowait(event)
|
self._event_queue.put_nowait(event)
|
||||||
self._processing_done.set()
|
self._processing_done.set()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -133,10 +157,14 @@ class BaseProcessor(ABC, Thread):
|
||||||
self._processing_done.set()
|
self._processing_done.set()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._ctx.reset()
|
self._processing_done.set()
|
||||||
|
self.reset()
|
||||||
self.logger.info('Processor stopped: %s', self.name)
|
self.logger.info('Processor stopped: %s', self.name)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
self._audio_queue.put_nowait(None)
|
self._audio_queue.put_nowait(None)
|
||||||
if self.is_alive() and self.ident != get_ident():
|
if self.is_alive() and self.ident != get_ident():
|
||||||
self.logger.debug('Stopping %s', self.name)
|
self.logger.debug('Stopping %s', self.name)
|
||||||
|
@ -146,7 +174,7 @@ class BaseProcessor(ABC, Thread):
|
||||||
self._ctx.start()
|
self._ctx.start()
|
||||||
|
|
||||||
def on_conversation_end(self):
|
def on_conversation_end(self):
|
||||||
self._ctx.stop()
|
self.reset()
|
||||||
|
|
||||||
def on_conversation_reset(self):
|
def on_conversation_reset(self):
|
||||||
self._ctx.reset()
|
self.reset()
|
||||||
|
|
|
@ -4,7 +4,7 @@ import pvrhino
|
||||||
|
|
||||||
from platypush.message.event.assistant import (
|
from platypush.message.event.assistant import (
|
||||||
ConversationTimeoutEvent,
|
ConversationTimeoutEvent,
|
||||||
IntentMatchedEvent,
|
IntentRecognizedEvent,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ._base import BaseProcessor
|
from ._base import BaseProcessor
|
||||||
|
@ -44,9 +44,9 @@ class IntentProcessor(BaseProcessor):
|
||||||
|
|
||||||
def process(
|
def process(
|
||||||
self, audio: Sequence[int]
|
self, audio: Sequence[int]
|
||||||
) -> Optional[Union[IntentMatchedEvent, ConversationTimeoutEvent]]:
|
) -> Optional[Union[IntentRecognizedEvent, ConversationTimeoutEvent]]:
|
||||||
"""
|
"""
|
||||||
Process the audio and return an ``IntentMatchedEvent`` if the intent was
|
Process the audio and return an ``IntentRecognizedEvent`` if the intent was
|
||||||
understood, or a ``ConversationTimeoutEvent`` if the conversation timed
|
understood, or a ``ConversationTimeoutEvent`` if the conversation timed
|
||||||
out, or ``None`` if the intent processing is not yet finalized.
|
out, or ``None`` if the intent processing is not yet finalized.
|
||||||
"""
|
"""
|
||||||
|
@ -62,25 +62,32 @@ class IntentProcessor(BaseProcessor):
|
||||||
)
|
)
|
||||||
|
|
||||||
if inference.is_understood:
|
if inference.is_understood:
|
||||||
event = IntentMatchedEvent(
|
event = IntentRecognizedEvent(
|
||||||
intent=inference.intent,
|
intent=inference.intent,
|
||||||
slots={slot.key: slot.value for slot in inference.slots},
|
slots=dict(inference.slots),
|
||||||
)
|
)
|
||||||
|
|
||||||
if not event and self._ctx.timed_out:
|
if not event and self._ctx.timed_out:
|
||||||
event = ConversationTimeoutEvent()
|
event = ConversationTimeoutEvent()
|
||||||
|
|
||||||
if event:
|
|
||||||
self._ctx.reset()
|
|
||||||
|
|
||||||
if event:
|
|
||||||
self.logger.debug('Intent event: %s', event)
|
|
||||||
|
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self._state_lock:
|
||||||
|
self._get_rhino().reset()
|
||||||
|
super().reset()
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
super().stop()
|
super().stop()
|
||||||
objs = self._rhino.copy()
|
|
||||||
for key, obj in objs.items():
|
with self._state_lock:
|
||||||
obj.delete()
|
objs = self._rhino.copy()
|
||||||
self._rhino.pop(key)
|
for key, obj in objs.items():
|
||||||
|
obj.delete()
|
||||||
|
self._rhino.pop(key)
|
||||||
|
|
|
@ -3,7 +3,7 @@ from queue import Queue
|
||||||
from threading import Event
|
from threading import Event
|
||||||
from typing import Callable, Optional, Sequence
|
from typing import Callable, Optional, Sequence
|
||||||
|
|
||||||
from platypush.message.event.assistant import AssistantEvent
|
from platypush.message.event.assistant import AssistantEvent, ConversationTimeoutEvent
|
||||||
from platypush.utils import wait_for_either
|
from platypush.utils import wait_for_either
|
||||||
|
|
||||||
from ._intent import IntentProcessor
|
from ._intent import IntentProcessor
|
||||||
|
@ -39,12 +39,14 @@ class SpeechProcessor:
|
||||||
self._stt_processor = SttProcessor(
|
self._stt_processor = SttProcessor(
|
||||||
conversation_timeout=conversation_timeout,
|
conversation_timeout=conversation_timeout,
|
||||||
stop_event=stop_event,
|
stop_event=stop_event,
|
||||||
|
enabled=stt_enabled,
|
||||||
get_cheetah_args=get_cheetah_args,
|
get_cheetah_args=get_cheetah_args,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._intent_processor = IntentProcessor(
|
self._intent_processor = IntentProcessor(
|
||||||
conversation_timeout=conversation_timeout,
|
conversation_timeout=conversation_timeout,
|
||||||
stop_event=stop_event,
|
stop_event=stop_event,
|
||||||
|
enabled=intent_enabled,
|
||||||
get_rhino_args=get_rhino_args,
|
get_rhino_args=get_rhino_args,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -77,24 +79,24 @@ class SpeechProcessor:
|
||||||
if self.should_stop():
|
if self.should_stop():
|
||||||
return evt
|
return evt
|
||||||
|
|
||||||
# Priority to the intent processor event, if the processor is enabled
|
with self._stt_processor._state_lock, self._intent_processor._state_lock:
|
||||||
if self._intent_enabled:
|
# Priority to the intent processor event, if the processor is enabled
|
||||||
evt = self._intent_processor.last_event()
|
if self._intent_enabled:
|
||||||
|
evt = self._intent_processor.last_event()
|
||||||
|
|
||||||
|
# If the intent processor didn't return any event, then return the STT
|
||||||
|
# processor event
|
||||||
|
if (
|
||||||
|
not evt or isinstance(evt, ConversationTimeoutEvent)
|
||||||
|
) and self._stt_enabled:
|
||||||
|
# self._stt_processor.processing_done.wait(timeout=timeout)
|
||||||
|
evt = self._stt_processor.last_event()
|
||||||
|
|
||||||
if evt:
|
if evt:
|
||||||
self.logger.debug('Intent processor event: %s', evt)
|
self._stt_processor.reset()
|
||||||
|
self._intent_processor.reset()
|
||||||
|
|
||||||
# If the intent processor didn't return any event, then return the STT
|
return evt
|
||||||
# processor event
|
|
||||||
if not evt and self._stt_enabled:
|
|
||||||
evt = self._stt_processor.last_event()
|
|
||||||
if evt:
|
|
||||||
self.logger.debug('STT processor event: %s', evt)
|
|
||||||
|
|
||||||
if evt:
|
|
||||||
self._stt_processor.clear_wait()
|
|
||||||
self._intent_processor.clear_wait()
|
|
||||||
|
|
||||||
return evt
|
|
||||||
|
|
||||||
def process(
|
def process(
|
||||||
self, audio: Sequence[int], block: bool = True, timeout: Optional[float] = None
|
self, audio: Sequence[int], block: bool = True, timeout: Optional[float] = None
|
||||||
|
@ -138,8 +140,11 @@ class SpeechProcessor:
|
||||||
"""
|
"""
|
||||||
Start the STT and Intent processors.
|
Start the STT and Intent processors.
|
||||||
"""
|
"""
|
||||||
self._stt_processor.start()
|
if self._stt_enabled:
|
||||||
self._intent_processor.start()
|
self._stt_processor.start()
|
||||||
|
|
||||||
|
if self._intent_enabled:
|
||||||
|
self._intent_processor.start()
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""
|
"""
|
||||||
|
@ -169,6 +174,13 @@ class SpeechProcessor:
|
||||||
if self._intent_enabled:
|
if self._intent_enabled:
|
||||||
self._intent_processor.on_conversation_reset()
|
self._intent_processor.on_conversation_reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Reset the state of the STT and Intent processors.
|
||||||
|
"""
|
||||||
|
self._stt_processor.reset()
|
||||||
|
self._intent_processor.reset()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sample_rate(self) -> int:
|
def sample_rate(self) -> int:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -37,16 +37,19 @@ class SttProcessor(BaseProcessor):
|
||||||
return self._get_cheetah().frame_length
|
return self._get_cheetah().frame_length
|
||||||
|
|
||||||
def _get_cheetah(self) -> pvcheetah.Cheetah:
|
def _get_cheetah(self) -> pvcheetah.Cheetah:
|
||||||
if not self._cheetah.get(self._model_path):
|
with self._state_lock:
|
||||||
self.logger.debug(
|
if not self._cheetah.get(self._model_path):
|
||||||
'Creating Cheetah instance for model %s', self._model_path
|
self.logger.debug(
|
||||||
)
|
'Creating Cheetah instance for model %s', self._model_path
|
||||||
self._cheetah[self._model_path] = pvcheetah.create(
|
)
|
||||||
**self._get_cheetah_args()
|
self._cheetah[self._model_path] = pvcheetah.create(
|
||||||
)
|
**self._get_cheetah_args()
|
||||||
self.logger.debug('Cheetah instance created for model %s', self._model_path)
|
)
|
||||||
|
self.logger.debug(
|
||||||
|
'Cheetah instance created for model %s', self._model_path
|
||||||
|
)
|
||||||
|
|
||||||
return self._cheetah[self._model_path]
|
return self._cheetah[self._model_path]
|
||||||
|
|
||||||
def process(
|
def process(
|
||||||
self, audio: Sequence[int]
|
self, audio: Sequence[int]
|
||||||
|
@ -54,6 +57,7 @@ class SttProcessor(BaseProcessor):
|
||||||
event = None
|
event = None
|
||||||
cheetah = self._get_cheetah()
|
cheetah = self._get_cheetah()
|
||||||
partial_transcript, self._ctx.is_final = cheetah.process(audio)
|
partial_transcript, self._ctx.is_final = cheetah.process(audio)
|
||||||
|
last_transcript = self._ctx.transcript
|
||||||
|
|
||||||
# Concatenate the partial transcript to the context
|
# Concatenate the partial transcript to the context
|
||||||
if partial_transcript:
|
if partial_transcript:
|
||||||
|
@ -69,24 +73,38 @@ class SttProcessor(BaseProcessor):
|
||||||
if self._ctx.is_final or self._ctx.timed_out:
|
if self._ctx.is_final or self._ctx.timed_out:
|
||||||
phrase = cheetah.flush() or ''
|
phrase = cheetah.flush() or ''
|
||||||
self._ctx.transcript += phrase
|
self._ctx.transcript += phrase
|
||||||
|
if self._ctx.transcript and self._ctx.transcript != last_transcript:
|
||||||
|
self.logger.debug('Processed STT transcript: %s', self._ctx.transcript)
|
||||||
|
last_transcript = self._ctx.transcript
|
||||||
|
|
||||||
phrase = self._ctx.transcript
|
phrase = self._ctx.transcript
|
||||||
phrase = phrase[:1].lower() + phrase[1:]
|
phrase = (phrase[:1].lower() + phrase[1:]).strip()
|
||||||
event = (
|
event = (
|
||||||
SpeechRecognizedEvent(phrase=phrase)
|
SpeechRecognizedEvent(phrase=phrase)
|
||||||
if phrase
|
if phrase
|
||||||
else ConversationTimeoutEvent()
|
else ConversationTimeoutEvent()
|
||||||
)
|
)
|
||||||
|
|
||||||
self._ctx.reset()
|
self.reset()
|
||||||
|
|
||||||
if event:
|
|
||||||
self.logger.debug('STT event: %s', event)
|
|
||||||
|
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self._state_lock:
|
||||||
|
super().reset()
|
||||||
|
self._get_cheetah().flush()
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
|
||||||
super().stop()
|
super().stop()
|
||||||
objs = self._cheetah.copy()
|
|
||||||
for key, obj in objs.items():
|
with self._state_lock:
|
||||||
obj.delete()
|
objs = self._cheetah.copy()
|
||||||
self._cheetah.pop(key)
|
for key, obj in objs.items():
|
||||||
|
obj.delete()
|
||||||
|
self._cheetah.pop(key)
|
||||||
|
|
|
@ -6,7 +6,7 @@ manifest:
|
||||||
- platypush.message.event.assistant.ConversationStartEvent
|
- platypush.message.event.assistant.ConversationStartEvent
|
||||||
- platypush.message.event.assistant.ConversationTimeoutEvent
|
- platypush.message.event.assistant.ConversationTimeoutEvent
|
||||||
- platypush.message.event.assistant.HotwordDetectedEvent
|
- platypush.message.event.assistant.HotwordDetectedEvent
|
||||||
- platypush.message.event.assistant.IntentMatchedEvent
|
- platypush.message.event.assistant.IntentRecognizedEvent
|
||||||
- platypush.message.event.assistant.MicMutedEvent
|
- platypush.message.event.assistant.MicMutedEvent
|
||||||
- platypush.message.event.assistant.MicUnmutedEvent
|
- platypush.message.event.assistant.MicUnmutedEvent
|
||||||
- platypush.message.event.assistant.NoResponseEvent
|
- platypush.message.event.assistant.NoResponseEvent
|
||||||
|
|
Loading…
Reference in a new issue