Compare commits

...

2 Commits

Author SHA1 Message Date
Fabio Manganiello 99829a93cf
[assistant.picovoice] Sync between the speech and intent engines.
continuous-integration/drone/push Build is passing Details
2024-04-22 02:05:19 +02:00
Fabio Manganiello 862d56a338
Prevent duplicate hook runs.
Instead of being a list, the hooks in the hook processor should be
backed by by-name and by-value maps.

Don't insert a hook if its exact backing method has already been
inserted. This is actually very common when hooks are defined as Python
snippets imported in other scripts too.
2024-04-22 02:05:19 +02:00
12 changed files with 209 additions and 104 deletions

View File

@ -1,4 +1,4 @@
from multiprocessing import RLock, Queue from multiprocessing import RLock, Queue, active_children
import os import os
from queue import Empty from queue import Empty
import socket import socket
@ -57,6 +57,25 @@ class CommandStream(ControllableProcess):
self.reset() self.reset()
return super().close() return super().close()
def _term_or_kill(self, kill: bool = False, visited=None) -> None:
func = 'kill' if kill else 'terminate'
visited = visited or set()
visited.add(id(self))
for child in active_children():
child_id = id(child)
if child_id not in visited:
visited.add(child_id)
getattr(child, func)()
getattr(super(), func)()
def terminate(self, visited=None) -> None:
self._term_or_kill(kill=False, visited=visited)
def kill(self) -> None:
self._term_or_kill(kill=True)
def __enter__(self) -> "CommandStream": def __enter__(self) -> "CommandStream":
self.reset() self.reset()
sock = self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock = self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)

View File

@ -1,4 +1,5 @@
import sys import sys
from typing import Callable, List, Union
from ..hook import EventHook from ..hook import EventHook
@ -20,10 +21,23 @@ class EventProcessor:
if hooks is None: if hooks is None:
hooks = Config.get_event_hooks() hooks = Config.get_event_hooks()
self.hooks = [] self._hooks_by_name = {}
self._hooks_by_value_id = {}
for name, hook in hooks.items(): for name, hook in hooks.items():
h = EventHook.build(name=name, hook=hook) self.add_hook(name, hook)
self.hooks.append(h)
@property
def hooks(self) -> List[EventHook]:
return list(self._hooks_by_name.values())
def add_hook(self, name: str, desc: Union[dict, Callable]):
hook_id = id(desc)
if hook_id in self._hooks_by_value_id:
return # Don't add the same hook twice
hook = EventHook.build(name=name, hook=desc)
self._hooks_by_name[name] = hook
self._hooks_by_value_id[hook_id] = hook
@staticmethod @staticmethod
def notify_web_clients(event): def notify_web_clients(event):

View File

@ -226,7 +226,7 @@ class SpeechRecognizedEvent(AssistantEvent):
return result return result
class IntentMatchedEvent(AssistantEvent): class IntentRecognizedEvent(AssistantEvent):
""" """
Event triggered when an intent is matched by a speech command. Event triggered when an intent is matched by a speech command.
""" """

View File

@ -259,9 +259,9 @@ class AssistantPlugin(Plugin, AssistantEntityManager, ABC):
self._send_event(SpeechRecognizedEvent, phrase=phrase) self._send_event(SpeechRecognizedEvent, phrase=phrase)
def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None): def _on_intent_matched(self, intent: str, slots: Optional[Dict[str, Any]] = None):
from platypush.message.event.assistant import IntentMatchedEvent from platypush.message.event.assistant import IntentRecognizedEvent
self._send_event(IntentMatchedEvent, intent=intent, slots=slots) self._send_event(IntentRecognizedEvent, intent=intent, slots=slots)
def _on_alarm_start(self): def _on_alarm_start(self):
from platypush.message.event.assistant import AlarmStartedEvent from platypush.message.event.assistant import AlarmStartedEvent

View File

@ -54,7 +54,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
access_key: str, access_key: str,
hotword_enabled: bool = True, hotword_enabled: bool = True,
stt_enabled: bool = True, stt_enabled: bool = True,
intent_enabled: bool = False,
keywords: Optional[Sequence[str]] = None, keywords: Optional[Sequence[str]] = None,
keyword_paths: Optional[Sequence[str]] = None, keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None, keyword_model_path: Optional[str] = None,
@ -77,10 +76,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
:param stt_enabled: Enable the speech-to-text engine (default: True). :param stt_enabled: Enable the speech-to-text engine (default: True).
**Note**: The speech-to-text engine requires you to add Cheetah to **Note**: The speech-to-text engine requires you to add Cheetah to
the products available in your Picovoice account. the products available in your Picovoice account.
:param intent_enabled: Enable the intent recognition engine (default:
False).
**Note**: The intent recognition engine requires you to add Rhino
to the products available in your Picovoice account.
:param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok :param keywords: List of keywords to listen for (e.g. ``alexa``, ``ok
google``...). This is required if the wake-word engine is enabled. google``...). This is required if the wake-word engine is enabled.
See the `Porcupine keywords repository See the `Porcupine keywords repository
@ -142,7 +137,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
Then a phrase like "turn on the lights in the living room" would Then a phrase like "turn on the lights in the living room" would
trigger a trigger a
:class:`platypush.message.event.assistant.IntentMatchedEvent` with: :class:`platypush.message.event.assistant.IntentRecognizedEvent` with:
.. code-block:: json .. code-block:: json
@ -155,6 +150,8 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
} }
} }
**Note**: The intent recognition engine requires you to add Rhino
to the products available in your Picovoice account.
:param endpoint_duration: If set, the assistant will stop listening when :param endpoint_duration: If set, the assistant will stop listening when
no speech is detected for the specified duration (in seconds) after no speech is detected for the specified duration (in seconds) after
the end of an utterance. the end of an utterance.
@ -191,7 +188,6 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
'access_key': access_key, 'access_key': access_key,
'hotword_enabled': hotword_enabled, 'hotword_enabled': hotword_enabled,
'stt_enabled': stt_enabled, 'stt_enabled': stt_enabled,
'intent_enabled': intent_enabled,
'keywords': keywords, 'keywords': keywords,
'keyword_paths': ( 'keyword_paths': (
os.path.expanduser(keyword_path) os.path.expanduser(keyword_path)
@ -208,7 +204,11 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
), ),
'endpoint_duration': endpoint_duration, 'endpoint_duration': endpoint_duration,
'enable_automatic_punctuation': enable_automatic_punctuation, 'enable_automatic_punctuation': enable_automatic_punctuation,
'start_conversation_on_hotword': start_conversation_on_hotword, 'start_conversation_on_hotword': (
start_conversation_on_hotword
if (intent_model_path or stt_enabled)
else False
),
'audio_queue_size': audio_queue_size, 'audio_queue_size': audio_queue_size,
'conversation_timeout': conversation_timeout, 'conversation_timeout': conversation_timeout,
'muted': muted, 'muted': muted,
@ -420,7 +420,7 @@ class AssistantPicovoicePlugin(AssistantPlugin, RunnablePlugin):
try: try:
for event in self._assistant: for event in self._assistant:
if event is not None: if event is not None:
self.logger.debug('Picovoice assistant event: %s', event) self.logger.debug('Dequeued assistant event: %s', event)
except KeyboardInterrupt: except KeyboardInterrupt:
break break
except Exception as e: except Exception as e:

View File

@ -12,7 +12,7 @@ from platypush.message.event.assistant import (
AssistantEvent, AssistantEvent,
ConversationTimeoutEvent, ConversationTimeoutEvent,
HotwordDetectedEvent, HotwordDetectedEvent,
IntentMatchedEvent, IntentRecognizedEvent,
SpeechRecognizedEvent, SpeechRecognizedEvent,
) )
from platypush.plugins.tts.picovoice import TtsPicovoicePlugin from platypush.plugins.tts.picovoice import TtsPicovoicePlugin
@ -37,7 +37,6 @@ class Assistant(Thread):
stop_event: Event, stop_event: Event,
hotword_enabled: bool = True, hotword_enabled: bool = True,
stt_enabled: bool = True, stt_enabled: bool = True,
intent_enabled: bool = False,
keywords: Optional[Sequence[str]] = None, keywords: Optional[Sequence[str]] = None,
keyword_paths: Optional[Sequence[str]] = None, keyword_paths: Optional[Sequence[str]] = None,
keyword_model_path: Optional[str] = None, keyword_model_path: Optional[str] = None,
@ -58,15 +57,12 @@ class Assistant(Thread):
on_hotword_detected=_default_callback, on_hotword_detected=_default_callback,
): ):
super().__init__(name='picovoice:Assistant') super().__init__(name='picovoice:Assistant')
if intent_enabled:
assert intent_model_path, 'Intent model path not provided'
self._access_key = access_key self._access_key = access_key
self._stop_event = stop_event self._stop_event = stop_event
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.hotword_enabled = hotword_enabled self.hotword_enabled = hotword_enabled
self.stt_enabled = stt_enabled self.stt_enabled = stt_enabled
self.intent_enabled = intent_enabled
self.keywords = list(keywords or []) self.keywords = list(keywords or [])
self.keyword_paths = None self.keyword_paths = None
self.keyword_model_path = None self.keyword_model_path = None
@ -86,11 +82,11 @@ class Assistant(Thread):
self._speech_processor = SpeechProcessor( self._speech_processor = SpeechProcessor(
stop_event=stop_event, stop_event=stop_event,
stt_enabled=stt_enabled, stt_enabled=stt_enabled,
intent_enabled=intent_enabled, intent_enabled=self.intent_enabled,
conversation_timeout=conversation_timeout, conversation_timeout=conversation_timeout,
model_path=speech_model_path, model_path=speech_model_path,
get_cheetah_args=self._get_speech_engine_args, get_cheetah_args=self._get_speech_engine_args,
get_rhino_args=self._get_speech_engine_args, get_rhino_args=self._get_intent_engine_args,
) )
self._on_conversation_start = on_conversation_start self._on_conversation_start = on_conversation_start
@ -133,15 +129,19 @@ class Assistant(Thread):
self._porcupine: Optional[pvporcupine.Porcupine] = None self._porcupine: Optional[pvporcupine.Porcupine] = None
@property @property
def is_responding(self): def intent_enabled(self) -> bool:
return self.intent_model_path is not None
@property
def is_responding(self) -> bool:
return self._responding.is_set() return self._responding.is_set()
@property @property
def speech_model_path(self): def speech_model_path(self) -> Optional[str]:
return self._speech_model_path_override or self._speech_model_path return self._speech_model_path_override or self._speech_model_path
@property @property
def intent_model_path(self): def intent_model_path(self) -> Optional[str]:
return self._intent_model_path_override or self._intent_model_path return self._intent_model_path_override or self._intent_model_path
@property @property
@ -224,6 +224,16 @@ class Assistant(Thread):
return args return args
def _get_intent_engine_args(self) -> dict:
args: Dict[str, Any] = {'access_key': self._access_key}
args['context_path'] = self.intent_model_path
if self.endpoint_duration:
args['endpoint_duration_sec'] = self.endpoint_duration
if self.enable_automatic_punctuation:
args['enable_automatic_punctuation'] = self.enable_automatic_punctuation
return args
def __enter__(self): def __enter__(self):
""" """
Get the assistant ready to start processing audio frames. Get the assistant ready to start processing audio frames.
@ -301,13 +311,16 @@ class Assistant(Thread):
if isinstance(evt, SpeechRecognizedEvent): if isinstance(evt, SpeechRecognizedEvent):
self._on_speech_recognized(phrase=evt.args['phrase']) self._on_speech_recognized(phrase=evt.args['phrase'])
if isinstance(evt, IntentMatchedEvent): if isinstance(evt, IntentRecognizedEvent):
self._on_intent_matched( self._on_intent_matched(
intent=evt.args['intent'], slots=evt.args.get('slots', {}) intent=evt.args['intent'], slots=evt.args.get('slots', {})
) )
if isinstance(evt, ConversationTimeoutEvent): if isinstance(evt, ConversationTimeoutEvent):
self._on_conversation_timeout() self._on_conversation_timeout()
if evt:
self._speech_processor.reset()
if ( if (
evt evt
and self.state == AssistantState.DETECTING_SPEECH and self.state == AssistantState.DETECTING_SPEECH

View File

@ -16,22 +16,16 @@ class ConversationContext:
intent: Optional[Intent] = None intent: Optional[Intent] = None
timeout: Optional[float] = None timeout: Optional[float] = None
t_start: Optional[float] = None t_start: Optional[float] = None
t_end: Optional[float] = None
def start(self): def start(self):
self.reset() self.reset()
self.t_start = time() self.t_start = time()
def stop(self):
self.reset()
self.t_end = time()
def reset(self): def reset(self):
self.transcript = '' self.transcript = ''
self.intent = None self.intent = None
self.is_final = False self.is_final = False
self.t_start = None self.t_start = None
self.t_end = None
@property @property
def timed_out(self): def timed_out(self):

View File

@ -1,8 +1,8 @@
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from queue import Empty, Queue from queue import Empty, Queue
from threading import Event, Thread, get_ident from threading import Event, RLock, Thread, get_ident
from typing import Optional, Sequence from typing import Any, Optional, Sequence
from platypush.message.event.assistant import AssistantEvent from platypush.message.event.assistant import AssistantEvent
@ -19,12 +19,14 @@ class BaseProcessor(ABC, Thread):
self, self,
*args, *args,
stop_event: Event, stop_event: Event,
enabled: bool = True,
conversation_timeout: Optional[float] = None, conversation_timeout: Optional[float] = None,
**kwargs, **kwargs,
): ):
super().__init__(*args, name=f'picovoice:{self.__class__.__name__}', **kwargs) super().__init__(*args, name=f'picovoice:{self.__class__.__name__}', **kwargs)
self.logger = logging.getLogger(self.name) self.logger = logging.getLogger(self.name)
self._enabled = enabled
self._audio_queue = Queue() self._audio_queue = Queue()
self._stop_event = stop_event self._stop_event = stop_event
self._ctx = ConversationContext(timeout=conversation_timeout) self._ctx = ConversationContext(timeout=conversation_timeout)
@ -36,6 +38,7 @@ class BaseProcessor(ABC, Thread):
# processing and it's ready to accept a new audio frame # processing and it's ready to accept a new audio frame
self._processing_done = Event() self._processing_done = Event()
self._processing_done.set() self._processing_done.set()
self._state_lock = RLock()
def should_stop(self) -> bool: def should_stop(self) -> bool:
return self._stop_event.is_set() return self._stop_event.is_set()
@ -44,10 +47,26 @@ class BaseProcessor(ABC, Thread):
return self._stop_event.wait(timeout) return self._stop_event.wait(timeout)
def enqueue(self, audio: Sequence[int]): def enqueue(self, audio: Sequence[int]):
if not self._enabled:
return
self._event_wait.set() self._event_wait.set()
self._processing_done.clear() self._processing_done.clear()
self._audio_queue.put_nowait(audio) self._audio_queue.put_nowait(audio)
def reset(self) -> Optional[Any]:
"""
Reset any pending context.
"""
if not self._enabled:
return
with self._state_lock:
self._ctx.reset()
self._event_queue.queue.clear()
self._event_wait.clear()
self._processing_done.set()
@property @property
def processing_done(self) -> Event: def processing_done(self) -> Event:
return self._processing_done return self._processing_done
@ -77,20 +96,18 @@ class BaseProcessor(ABC, Thread):
""" """
:return: The latest event that was processed by the processor. :return: The latest event that was processed by the processor.
""" """
evt = None with self._state_lock:
try: evt = None
while True: try:
evt = self._event_queue.get_nowait() while True:
except Empty: evt = self._event_queue.get_nowait()
pass except Empty:
pass
if evt: if evt:
self._event_wait.clear() self._event_wait.clear()
return evt return evt
def clear_wait(self):
self._event_wait.clear()
@abstractmethod @abstractmethod
def process(self, audio: Sequence[int]) -> Optional[AssistantEvent]: def process(self, audio: Sequence[int]) -> Optional[AssistantEvent]:
@ -100,7 +117,10 @@ class BaseProcessor(ABC, Thread):
def run(self): def run(self):
super().run() super().run()
self._ctx.reset() if not self._enabled:
self.wait_stop()
self.reset()
self._processing_done.clear() self._processing_done.clear()
self.logger.info('Processor started: %s', self.name) self.logger.info('Processor started: %s', self.name)
@ -119,7 +139,11 @@ class BaseProcessor(ABC, Thread):
try: try:
self._processing_done.clear() self._processing_done.clear()
event = self.process(audio) event = self.process(audio)
if event: if event:
self.logger.debug(
'Dispatching event processed from %s: %s', self.name, event
)
self._event_queue.put_nowait(event) self._event_queue.put_nowait(event)
self._processing_done.set() self._processing_done.set()
except Exception as e: except Exception as e:
@ -133,10 +157,14 @@ class BaseProcessor(ABC, Thread):
self._processing_done.set() self._processing_done.set()
continue continue
self._ctx.reset() self._processing_done.set()
self.reset()
self.logger.info('Processor stopped: %s', self.name) self.logger.info('Processor stopped: %s', self.name)
def stop(self): def stop(self):
if not self._enabled:
return
self._audio_queue.put_nowait(None) self._audio_queue.put_nowait(None)
if self.is_alive() and self.ident != get_ident(): if self.is_alive() and self.ident != get_ident():
self.logger.debug('Stopping %s', self.name) self.logger.debug('Stopping %s', self.name)
@ -146,7 +174,7 @@ class BaseProcessor(ABC, Thread):
self._ctx.start() self._ctx.start()
def on_conversation_end(self): def on_conversation_end(self):
self._ctx.stop() self.reset()
def on_conversation_reset(self): def on_conversation_reset(self):
self._ctx.reset() self.reset()

View File

@ -4,7 +4,7 @@ import pvrhino
from platypush.message.event.assistant import ( from platypush.message.event.assistant import (
ConversationTimeoutEvent, ConversationTimeoutEvent,
IntentMatchedEvent, IntentRecognizedEvent,
) )
from ._base import BaseProcessor from ._base import BaseProcessor
@ -44,9 +44,9 @@ class IntentProcessor(BaseProcessor):
def process( def process(
self, audio: Sequence[int] self, audio: Sequence[int]
) -> Optional[Union[IntentMatchedEvent, ConversationTimeoutEvent]]: ) -> Optional[Union[IntentRecognizedEvent, ConversationTimeoutEvent]]:
""" """
Process the audio and return an ``IntentMatchedEvent`` if the intent was Process the audio and return an ``IntentRecognizedEvent`` if the intent was
understood, or a ``ConversationTimeoutEvent`` if the conversation timed understood, or a ``ConversationTimeoutEvent`` if the conversation timed
out, or ``None`` if the intent processing is not yet finalized. out, or ``None`` if the intent processing is not yet finalized.
""" """
@ -62,25 +62,32 @@ class IntentProcessor(BaseProcessor):
) )
if inference.is_understood: if inference.is_understood:
event = IntentMatchedEvent( event = IntentRecognizedEvent(
intent=inference.intent, intent=inference.intent,
slots={slot.key: slot.value for slot in inference.slots}, slots=dict(inference.slots),
) )
if not event and self._ctx.timed_out: if not event and self._ctx.timed_out:
event = ConversationTimeoutEvent() event = ConversationTimeoutEvent()
if event:
self._ctx.reset()
if event:
self.logger.debug('Intent event: %s', event)
return event return event
def reset(self) -> None:
if not self._enabled:
return
with self._state_lock:
self._get_rhino().reset()
super().reset()
def stop(self): def stop(self):
if not self._enabled:
return
super().stop() super().stop()
objs = self._rhino.copy()
for key, obj in objs.items(): with self._state_lock:
obj.delete() objs = self._rhino.copy()
self._rhino.pop(key) for key, obj in objs.items():
obj.delete()
self._rhino.pop(key)

View File

@ -3,7 +3,7 @@ from queue import Queue
from threading import Event from threading import Event
from typing import Callable, Optional, Sequence from typing import Callable, Optional, Sequence
from platypush.message.event.assistant import AssistantEvent from platypush.message.event.assistant import AssistantEvent, ConversationTimeoutEvent
from platypush.utils import wait_for_either from platypush.utils import wait_for_either
from ._intent import IntentProcessor from ._intent import IntentProcessor
@ -39,12 +39,14 @@ class SpeechProcessor:
self._stt_processor = SttProcessor( self._stt_processor = SttProcessor(
conversation_timeout=conversation_timeout, conversation_timeout=conversation_timeout,
stop_event=stop_event, stop_event=stop_event,
enabled=stt_enabled,
get_cheetah_args=get_cheetah_args, get_cheetah_args=get_cheetah_args,
) )
self._intent_processor = IntentProcessor( self._intent_processor = IntentProcessor(
conversation_timeout=conversation_timeout, conversation_timeout=conversation_timeout,
stop_event=stop_event, stop_event=stop_event,
enabled=intent_enabled,
get_rhino_args=get_rhino_args, get_rhino_args=get_rhino_args,
) )
@ -77,24 +79,24 @@ class SpeechProcessor:
if self.should_stop(): if self.should_stop():
return evt return evt
# Priority to the intent processor event, if the processor is enabled with self._stt_processor._state_lock, self._intent_processor._state_lock:
if self._intent_enabled: # Priority to the intent processor event, if the processor is enabled
evt = self._intent_processor.last_event() if self._intent_enabled:
evt = self._intent_processor.last_event()
# If the intent processor didn't return any event, then return the STT
# processor event
if (
not evt or isinstance(evt, ConversationTimeoutEvent)
) and self._stt_enabled:
# self._stt_processor.processing_done.wait(timeout=timeout)
evt = self._stt_processor.last_event()
if evt: if evt:
self.logger.debug('Intent processor event: %s', evt) self._stt_processor.reset()
self._intent_processor.reset()
# If the intent processor didn't return any event, then return the STT return evt
# processor event
if not evt and self._stt_enabled:
evt = self._stt_processor.last_event()
if evt:
self.logger.debug('STT processor event: %s', evt)
if evt:
self._stt_processor.clear_wait()
self._intent_processor.clear_wait()
return evt
def process( def process(
self, audio: Sequence[int], block: bool = True, timeout: Optional[float] = None self, audio: Sequence[int], block: bool = True, timeout: Optional[float] = None
@ -138,8 +140,11 @@ class SpeechProcessor:
""" """
Start the STT and Intent processors. Start the STT and Intent processors.
""" """
self._stt_processor.start() if self._stt_enabled:
self._intent_processor.start() self._stt_processor.start()
if self._intent_enabled:
self._intent_processor.start()
def stop(self): def stop(self):
""" """
@ -169,6 +174,13 @@ class SpeechProcessor:
if self._intent_enabled: if self._intent_enabled:
self._intent_processor.on_conversation_reset() self._intent_processor.on_conversation_reset()
def reset(self):
"""
Reset the state of the STT and Intent processors.
"""
self._stt_processor.reset()
self._intent_processor.reset()
@property @property
def sample_rate(self) -> int: def sample_rate(self) -> int:
""" """

View File

@ -37,16 +37,19 @@ class SttProcessor(BaseProcessor):
return self._get_cheetah().frame_length return self._get_cheetah().frame_length
def _get_cheetah(self) -> pvcheetah.Cheetah: def _get_cheetah(self) -> pvcheetah.Cheetah:
if not self._cheetah.get(self._model_path): with self._state_lock:
self.logger.debug( if not self._cheetah.get(self._model_path):
'Creating Cheetah instance for model %s', self._model_path self.logger.debug(
) 'Creating Cheetah instance for model %s', self._model_path
self._cheetah[self._model_path] = pvcheetah.create( )
**self._get_cheetah_args() self._cheetah[self._model_path] = pvcheetah.create(
) **self._get_cheetah_args()
self.logger.debug('Cheetah instance created for model %s', self._model_path) )
self.logger.debug(
'Cheetah instance created for model %s', self._model_path
)
return self._cheetah[self._model_path] return self._cheetah[self._model_path]
def process( def process(
self, audio: Sequence[int] self, audio: Sequence[int]
@ -54,6 +57,7 @@ class SttProcessor(BaseProcessor):
event = None event = None
cheetah = self._get_cheetah() cheetah = self._get_cheetah()
partial_transcript, self._ctx.is_final = cheetah.process(audio) partial_transcript, self._ctx.is_final = cheetah.process(audio)
last_transcript = self._ctx.transcript
# Concatenate the partial transcript to the context # Concatenate the partial transcript to the context
if partial_transcript: if partial_transcript:
@ -69,24 +73,38 @@ class SttProcessor(BaseProcessor):
if self._ctx.is_final or self._ctx.timed_out: if self._ctx.is_final or self._ctx.timed_out:
phrase = cheetah.flush() or '' phrase = cheetah.flush() or ''
self._ctx.transcript += phrase self._ctx.transcript += phrase
if self._ctx.transcript and self._ctx.transcript != last_transcript:
self.logger.debug('Processed STT transcript: %s', self._ctx.transcript)
last_transcript = self._ctx.transcript
phrase = self._ctx.transcript phrase = self._ctx.transcript
phrase = phrase[:1].lower() + phrase[1:] phrase = (phrase[:1].lower() + phrase[1:]).strip()
event = ( event = (
SpeechRecognizedEvent(phrase=phrase) SpeechRecognizedEvent(phrase=phrase)
if phrase if phrase
else ConversationTimeoutEvent() else ConversationTimeoutEvent()
) )
self._ctx.reset() self.reset()
if event:
self.logger.debug('STT event: %s', event)
return event return event
def reset(self):
if not self._enabled:
return
with self._state_lock:
super().reset()
self._get_cheetah().flush()
def stop(self): def stop(self):
if not self._enabled:
return
super().stop() super().stop()
objs = self._cheetah.copy()
for key, obj in objs.items(): with self._state_lock:
obj.delete() objs = self._cheetah.copy()
self._cheetah.pop(key) for key, obj in objs.items():
obj.delete()
self._cheetah.pop(key)

View File

@ -6,7 +6,7 @@ manifest:
- platypush.message.event.assistant.ConversationStartEvent - platypush.message.event.assistant.ConversationStartEvent
- platypush.message.event.assistant.ConversationTimeoutEvent - platypush.message.event.assistant.ConversationTimeoutEvent
- platypush.message.event.assistant.HotwordDetectedEvent - platypush.message.event.assistant.HotwordDetectedEvent
- platypush.message.event.assistant.IntentMatchedEvent - platypush.message.event.assistant.IntentRecognizedEvent
- platypush.message.event.assistant.MicMutedEvent - platypush.message.event.assistant.MicMutedEvent
- platypush.message.event.assistant.MicUnmutedEvent - platypush.message.event.assistant.MicUnmutedEvent
- platypush.message.event.assistant.NoResponseEvent - platypush.message.event.assistant.NoResponseEvent