[sound] Plugin refactor.

- Added `input_format`/`output_format` options to both input and output
  audio streams.

- Replaced the previous (confusing) occurrences of `ffmpeg_format` and
  `format`.

- Added custom `dtype` option for `sound.play`.

- Added `join` flag (default: false) to `sound.play` to wait for the
  playback to finish.
This commit is contained in:
Fabio Manganiello 2023-10-31 01:41:13 +01:00
parent d5514d7f27
commit f960ec4bf4
8 changed files with 115 additions and 58 deletions

View file

@ -86,8 +86,11 @@ class SoundPlugin(RunnablePlugin):
sample_rate: Optional[int] = None, sample_rate: Optional[int] = None,
channels: int = 2, channels: int = 2,
volume: float = 100, volume: float = 100,
dtype: Optional[str] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
stream_name: Optional[str] = None, stream_name: Optional[str] = None,
stream_index: Optional[int] = None, stream_index: Optional[int] = None,
join: bool = False,
): ):
""" """
Plays an audio file/URL (any audio format supported by ffmpeg works) or Plays an audio file/URL (any audio format supported by ffmpeg works) or
@ -159,6 +162,12 @@ class SoundPlugin(RunnablePlugin):
:param channels: Number of audio channels. Default: number of channels :param channels: Number of audio channels. Default: number of channels
in the audio file in file mode, 1 if in synth mode in the audio file in file mode, 1 if in synth mode
:param volume: Playback volume, between 0 and 100. Default: 100. :param volume: Playback volume, between 0 and 100. Default: 100.
:param dtype: Data type for the audio samples, if playing raw PCM audio
frames. Supported types: 'float64', 'float32', 'int32', 'int16',
'int8', 'uint8'.
:param format: Output audio format, if you want to convert the audio to
another format before playing it. The list of available formats can
be retrieved through the ``ffmpeg -formats`` command. Default: None
:param stream_index: If specified, play to an already active stream :param stream_index: If specified, play to an already active stream
index (you can get them through :meth:`.query_streams`). Default: index (you can get them through :meth:`.query_streams`). Default:
creates a new audio stream through PortAudio. creates a new audio stream through PortAudio.
@ -167,6 +176,8 @@ class SoundPlugin(RunnablePlugin):
name will be created. If not set, and ``stream_index`` is not set name will be created. If not set, and ``stream_index`` is not set
either, then a new stream will be created on the next available either, then a new stream will be created on the next available
index and named ``platypush-stream-<index>``. index and named ``platypush-stream-<index>``.
:param join: If True, then the method will block until the playback is
completed. Default: False.
""" """
dev = self._manager.get_device(device=device, type=StreamType.OUTPUT) dev = self._manager.get_device(device=device, type=StreamType.OUTPUT)
@ -193,7 +204,13 @@ class SoundPlugin(RunnablePlugin):
stream_index, stream_index,
) )
self._manager.create_player( player_kwargs = {}
if dtype:
player_kwargs['dtype'] = dtype
if format:
player_kwargs['format'] = format
player = self._manager.create_player(
device=dev.index, device=dev.index,
infile=resource, infile=resource,
sound=sound, sound=sound,
@ -203,7 +220,12 @@ class SoundPlugin(RunnablePlugin):
channels=channels, channels=channels,
volume=volume, volume=volume,
stream_name=stream_name, stream_name=stream_name,
).start() **player_kwargs,
)
player.start()
if join:
player.join()
@action @action
def stream_recording(self, *args, **kwargs): def stream_recording(self, *args, **kwargs):
@ -255,7 +277,7 @@ class SoundPlugin(RunnablePlugin):
:param sample_rate: Recording sample rate (default: device default rate) :param sample_rate: Recording sample rate (default: device default rate)
:param dtype: Data type for the audio samples. Supported types: :param dtype: Data type for the audio samples. Supported types:
'float64', 'float32', 'int32', 'int16', 'int8', 'uint8'. Default: 'float64', 'float32', 'int32', 'int16', 'int8', 'uint8'. Default:
float32 int16.
:param blocksize: Audio block size (default: configured :param blocksize: Audio block size (default: configured
`input_blocksize` or 2048) `input_blocksize` or 2048)
:param play_audio: If True, then the recorded audio will be played in :param play_audio: If True, then the recorded audio will be played in

View file

@ -7,8 +7,9 @@ from threading import Event, RLock, Thread
from typing import Any, Callable, Coroutine, Iterable, Optional from typing import Any, Callable, Coroutine, Iterable, Optional
from platypush.context import get_or_create_event_loop from platypush.context import get_or_create_event_loop
from platypush.utils import is_debug_enabled
_dtype_to_ffmpeg_format = { dtype_to_ffmpeg_format = {
'int8': 's8', 'int8': 's8',
'uint8': 'u8', 'uint8': 'u8',
'int16': 's16le', 'int16': 's16le',
@ -46,7 +47,8 @@ class AudioConverter(Thread, ABC):
volume: float, volume: float,
dtype: str, dtype: str,
chunk_size: int, chunk_size: int,
format: Optional[str] = None, # pylint: disable=redefined-builtin input_format: Optional[str] = None, # pylint: disable=redefined-builtin
output_format: Optional[str] = None, # pylint: disable=redefined-builtin
on_exit: Optional[Callable[[], Any]] = None, on_exit: Optional[Callable[[], Any]] = None,
**kwargs, **kwargs,
): ):
@ -58,24 +60,20 @@ class AudioConverter(Thread, ABC):
:param dtype: The (numpy) data type of the raw input/output audio. :param dtype: The (numpy) data type of the raw input/output audio.
:param chunk_size: Number of bytes that will be read at once from the :param chunk_size: Number of bytes that will be read at once from the
ffmpeg process. ffmpeg process.
:param format: Input/output audio format. :param input_format: Input audio format.
:param output_format: Output audio format.
:param on_exit: Function to call when the ffmpeg process exits. :param on_exit: Function to call when the ffmpeg process exits.
""" """
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
ffmpeg_format = _dtype_to_ffmpeg_format.get(dtype)
assert ffmpeg_format, (
f'Unsupported data type: {dtype}. Supported data types: '
f'{list(_dtype_to_ffmpeg_format.keys())}'
)
self._ffmpeg_bin = ffmpeg_bin self._ffmpeg_bin = ffmpeg_bin
self._ffmpeg_format = ffmpeg_format
self._ffmpeg_task: Optional[Coroutine] = None self._ffmpeg_task: Optional[Coroutine] = None
self._sample_rate = sample_rate self._sample_rate = sample_rate
self._channels = channels self._channels = channels
self._chunk_size = chunk_size self._chunk_size = chunk_size
self._format = format self._input_format = input_format
self._output_format = output_format
self._dtype = dtype
self._closed = False self._closed = False
self._out_queue = Queue() self._out_queue = Queue()
self.ffmpeg = None self.ffmpeg = None
@ -120,7 +118,8 @@ class AudioConverter(Thread, ABC):
""" """
Set of arguments common to all ffmpeg converter instances. Set of arguments common to all ffmpeg converter instances.
""" """
return ('-hide_banner', '-loglevel', 'warning', '-y') log_level = 'debug' if is_debug_enabled() else 'warning'
return ('-hide_banner', '-loglevel', log_level, '-y')
@property @property
@abstractmethod @abstractmethod
@ -150,20 +149,6 @@ class AudioConverter(Thread, ABC):
return args + ('-channel_layout', 'stereo') return args + ('-channel_layout', 'stereo')
return args return args
@property
def _raw_ffmpeg_args(self) -> Iterable[str]:
"""
Ffmpeg arguments for raw audio input/output given the current
configuration.
"""
return (
'-f',
self._ffmpeg_format,
'-ar',
str(self._sample_rate),
*self._channel_layout_args,
)
@property @property
def _audio_volume_args(self) -> Iterable[str]: def _audio_volume_args(self) -> Iterable[str]:
""" """
@ -197,23 +182,6 @@ class AudioConverter(Thread, ABC):
""" """
return PIPE return PIPE
@property
def _compressed_ffmpeg_args(self) -> Iterable[str]:
"""
Ffmpeg arguments for the compressed audio given the current
configuration.
"""
if not self._format:
return ()
ffmpeg_args = self._format_to_ffmpeg_args.get(self._format)
assert ffmpeg_args, (
f'Unsupported output format: {self._format}. Supported formats: '
f'{list(self._format_to_ffmpeg_args.keys())}'
)
return ffmpeg_args
async def _audio_proxy(self, timeout: Optional[float] = None): async def _audio_proxy(self, timeout: Optional[float] = None):
""" """
Proxy the converted audio stream to the output queue for downstream Proxy the converted audio stream to the output queue for downstream
@ -236,11 +204,6 @@ class AudioConverter(Thread, ABC):
self.logger.info('Running ffmpeg: %s', ' '.join(ffmpeg_args)) self.logger.info('Running ffmpeg: %s', ' '.join(ffmpeg_args))
try:
await asyncio.wait_for(self.ffmpeg.wait(), 0.1)
except asyncio.TimeoutError:
pass
while ( while (
self._loop self._loop
and self.ffmpeg and self.ffmpeg
@ -298,6 +261,9 @@ class AudioConverter(Thread, ABC):
self._loop.run_until_complete(self._ffmpeg_task) self._loop.run_until_complete(self._ffmpeg_task)
except RuntimeError as e: except RuntimeError as e:
self.logger.warning(e) self.logger.warning(e)
except Exception as e:
self.logger.warning('Audio converter error: %s', e)
self.logger.exception(e)
finally: finally:
self.stop() self.stop()

View file

@ -1,6 +1,6 @@
from typing import Iterable from typing import Iterable
from ._base import AudioConverter from ._base import AudioConverter, dtype_to_ffmpeg_format
class RawInputAudioConverter(AudioConverter): class RawInputAudioConverter(AudioConverter):
@ -8,13 +8,42 @@ class RawInputAudioConverter(AudioConverter):
Converts raw audio input to a compressed media format. Converts raw audio input to a compressed media format.
""" """
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self._dtype and not self._input_format:
ffmpeg_format = dtype_to_ffmpeg_format.get(self._dtype)
assert ffmpeg_format, (
f'Unsupported data type: {self._dtype}. Supported data types: '
f'{list(dtype_to_ffmpeg_format.keys())}'
)
self._input_format = ffmpeg_format
@property @property
def _input_format_args(self) -> Iterable[str]: def _input_format_args(self) -> Iterable[str]:
return self._raw_ffmpeg_args args = (
'-ar',
str(self._sample_rate),
*self._channel_layout_args,
)
if self._input_format:
args = ('-f', self._input_format) + args
return args
@property @property
def _output_format_args(self) -> Iterable[str]: def _output_format_args(self) -> Iterable[str]:
return self._compressed_ffmpeg_args if not self._output_format:
return ()
ffmpeg_args = self._format_to_ffmpeg_args.get(self._output_format)
assert ffmpeg_args, (
f'Unsupported output format: {self._output_format}. Supported formats: '
f'{list(self._format_to_ffmpeg_args.keys())}'
)
return ffmpeg_args
# vim:sw=4:ts=4:et: # vim:sw=4:ts=4:et:

View file

@ -1,6 +1,6 @@
from typing import Iterable from typing import Iterable
from ._base import AudioConverter from ._base import AudioConverter, dtype_to_ffmpeg_format
class RawOutputAudioConverter(AudioConverter): class RawOutputAudioConverter(AudioConverter):
@ -8,13 +8,40 @@ class RawOutputAudioConverter(AudioConverter):
Converts input audio to raw audio output. Converts input audio to raw audio output.
""" """
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self._dtype and not self._output_format:
ffmpeg_format = dtype_to_ffmpeg_format.get(self._dtype)
assert ffmpeg_format, (
f'Unsupported data type: {self._dtype}. Supported data types: '
f'{list(dtype_to_ffmpeg_format.keys())}'
)
self._output_format = ffmpeg_format
@property @property
def _input_format_args(self) -> Iterable[str]: def _input_format_args(self) -> Iterable[str]:
return self._compressed_ffmpeg_args if not self._input_format:
return ()
ffmpeg_args = self._format_to_ffmpeg_args.get(self._input_format)
if not ffmpeg_args:
return ('-f', self._input_format)
return ffmpeg_args
@property @property
def _output_format_args(self) -> Iterable[str]: def _output_format_args(self) -> Iterable[str]:
return self._raw_ffmpeg_args args = (
'-ar',
str(self._sample_rate),
*self._channel_layout_args,
)
if self._output_format:
args = ('-f', self._output_format) + args
return args
class RawOutputAudioFromFileConverter(RawOutputAudioConverter): class RawOutputAudioFromFileConverter(RawOutputAudioConverter):

View file

@ -61,6 +61,7 @@ class AudioManager:
duration: Optional[float] = None, duration: Optional[float] = None,
sample_rate: Optional[int] = None, sample_rate: Optional[int] = None,
dtype: str = 'int16', dtype: str = 'int16',
format: Optional[str] = None, # pylint: disable=redefined-builtin
blocksize: Optional[int] = None, blocksize: Optional[int] = None,
latency: Union[float, str] = 'high', latency: Union[float, str] = 'high',
stream_name: Optional[str] = None, stream_name: Optional[str] = None,
@ -77,6 +78,7 @@ class AudioManager:
:param duration: Duration of the stream in seconds. :param duration: Duration of the stream in seconds.
:param sample_rate: Sample rate of the stream. :param sample_rate: Sample rate of the stream.
:param dtype: Data type of the stream. :param dtype: Data type of the stream.
:param format: Output format of the stream.
:param blocksize: Block size of the stream. :param blocksize: Block size of the stream.
:param latency: Latency of the stream. :param latency: Latency of the stream.
:param stream_name: Name of the stream. :param stream_name: Name of the stream.
@ -93,6 +95,7 @@ class AudioManager:
blocksize=blocksize or self.output_blocksize, blocksize=blocksize or self.output_blocksize,
latency=latency, latency=latency,
channels=channels, channels=channels,
output_format=format,
queue_size=self.queue_size, queue_size=self.queue_size,
should_stop=self._should_stop, should_stop=self._should_stop,
) )

View file

@ -43,6 +43,8 @@ class AudioThread(Thread, ABC):
infile: Optional[str] = None, infile: Optional[str] = None,
outfile: Optional[str] = None, outfile: Optional[str] = None,
duration: Optional[float] = None, duration: Optional[float] = None,
input_format: Optional[str] = None,
output_format: Optional[str] = None,
latency: Union[float, str] = 'high', latency: Union[float, str] = 'high',
redis_queue: Optional[str] = None, redis_queue: Optional[str] = None,
should_stop: Optional[Event] = None, should_stop: Optional[Event] = None,
@ -66,6 +68,8 @@ class AudioThread(Thread, ABC):
stream. stream.
:param outfile: Path to the output file. :param outfile: Path to the output file.
:param duration: Duration of the audio stream. :param duration: Duration of the audio stream.
:param input_format: Input format override.
:param output_format: Output format override.
:param latency: Latency to use. :param latency: Latency to use.
:param redis_queue: Redis queue to use. :param redis_queue: Redis queue to use.
:param should_stop: Synchronize with upstream stop events. :param should_stop: Synchronize with upstream stop events.
@ -83,6 +87,8 @@ class AudioThread(Thread, ABC):
self.volume = volume self.volume = volume
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.dtype = dtype self.dtype = dtype
self.input_format = input_format
self.output_format = output_format
self.stream = stream self.stream = stream
self.duration = duration self.duration = duration
self.blocksize = blocksize * channels self.blocksize = blocksize * channels
@ -349,6 +355,9 @@ class AudioThread(Thread, ABC):
self.logger.warning( self.logger.warning(
'Audio callback timeout for %s', self.__class__.__name__ 'Audio callback timeout for %s', self.__class__.__name__
) )
except Exception as e:
self.logger.warning('Unhandled sound on %s', self.__class__.__name__)
self.logger.exception(e)
finally: finally:
self.notify_stop() self.notify_stop()

View file

@ -20,6 +20,7 @@ class AudioResourcePlayer(AudioPlayer):
def _converter_args(self) -> dict: def _converter_args(self) -> dict:
return { return {
'infile': self.infile, 'infile': self.infile,
'output_format': self.output_format,
**super()._converter_args, **super()._converter_args,
} }

View file

@ -81,7 +81,7 @@ class AudioRecorder(AudioThread):
@property @property
def _converter_args(self) -> dict: def _converter_args(self) -> dict:
return { return {
'format': self.output_format, 'output_format': self.output_format,
**super()._converter_args, **super()._converter_args,
} }