mirror of https://github.com/BlackLight/micmon.git
118 lines
5.5 KiB
Python
118 lines
5.5 KiB
Python
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
|
|
from micmon.audio import AudioDirectory, AudioFile, AudioSegment
|
|
from micmon.dataset import DatasetWriter
|
|
|
|
logger = logging.getLogger(__name__)
|
|
defaults = {
|
|
'sample_duration': 2.0,
|
|
'sample_rate': 44100,
|
|
'channels': 1,
|
|
'ffmpeg_bin': 'ffmpeg',
|
|
}
|
|
|
|
|
|
def create_dataset(audio_dir: str, dataset_dir: str,
|
|
low_freq: int = AudioSegment.default_low_freq,
|
|
high_freq: int = AudioSegment.default_high_freq,
|
|
bins: int = AudioSegment.default_bins,
|
|
sample_duration: float = defaults['sample_duration'],
|
|
sample_rate: int = defaults['sample_rate'],
|
|
channels: int = defaults['channels'],
|
|
ffmpeg_bin: str = defaults['ffmpeg_bin']):
|
|
audio_dir = os.path.abspath(os.path.expanduser(audio_dir))
|
|
dataset_dir = os.path.abspath(os.path.expanduser(dataset_dir))
|
|
audio_dirs = AudioDirectory.scan(audio_dir)
|
|
|
|
for audio_dir in audio_dirs:
|
|
dataset_file = os.path.join(dataset_dir, os.path.basename(audio_dir.path) + '.npz')
|
|
logger.info(f'Processing audio sample {audio_dir.path}')
|
|
|
|
with AudioFile(audio_dir.audio_file, audio_dir.labels_file,
|
|
sample_duration=sample_duration, sample_rate=sample_rate, channels=channels,
|
|
ffmpeg_bin=os.path.expanduser(ffmpeg_bin)) as reader, \
|
|
DatasetWriter(dataset_file, low_freq=low_freq, high_freq=high_freq, bins=bins) as writer:
|
|
for sample in reader:
|
|
writer += sample
|
|
|
|
|
|
def main():
|
|
# noinspection PyTypeChecker
|
|
parser = argparse.ArgumentParser(
|
|
description='''
|
|
Tool to create numpy dataset files with audio spectrum data from a set of labelled raw audio files.''',
|
|
|
|
epilog='''
|
|
- audio_dir should contain a list of sub-directories, each of which represents a labelled audio sample.
|
|
audio_dir should have the following structure:
|
|
|
|
audio_dir/
|
|
-> train_sample_1
|
|
-> audio.mp3
|
|
-> labels.json
|
|
-> train_sample_2
|
|
-> audio.mp3
|
|
-> labels.json
|
|
...
|
|
|
|
- labels.json is a key-value JSON file that contains the labels for each audio segment. Example:
|
|
|
|
{
|
|
"00:00": "negative",
|
|
"02:13": "positive",
|
|
"04:57": "negative",
|
|
"15:41": "positive",
|
|
"18:24": "negative"
|
|
}
|
|
|
|
Each entry indicates that all the audio samples between the specified timestamp and the next entry or
|
|
the end of the audio file should be applied the specified label.
|
|
|
|
- dataset_dir is the directory where the generated labelled spectrum dataset in .npz format will be saved.
|
|
Each dataset file will be named like its associated audio samples directory.''',
|
|
|
|
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
)
|
|
|
|
parser.add_argument('audio_dir', help='Directory containing the raw audio samples directories to be scanned.')
|
|
parser.add_argument('dataset_dir', help='Destination directory for the compressed .npz files containing the '
|
|
'frequency spectrum datasets.')
|
|
parser.add_argument('--low', help='Specify the lowest frequency to be considered in the generated frequency '
|
|
'spectrum. Default: 20 Hz (lowest possible frequency audible to a human ear).',
|
|
required=False, default=AudioSegment.default_low_freq, dest='low_freq', type=int)
|
|
|
|
parser.add_argument('--high', help='Specify the highest frequency to be considered in the generated frequency '
|
|
'spectrum. Default: 20 kHz (highest possible frequency audible to a human ear).',
|
|
required=False, default=AudioSegment.default_high_freq, dest='high_freq', type=int)
|
|
|
|
parser.add_argument('-b', '--bins', help=f'Specify the number of frequency bins to be used for the spectrum '
|
|
f'analysis (default: {AudioSegment.default_bins})',
|
|
required=False, default=AudioSegment.default_bins, dest='bins', type=int)
|
|
|
|
parser.add_argument('-d', '--sample-duration', help=f'The script will calculate the spectrum of audio segments of '
|
|
f'this specified length in seconds (default: '
|
|
f'{defaults["sample_duration"]}).',
|
|
required=False, default=defaults['sample_duration'], dest='sample_duration', type=float)
|
|
|
|
parser.add_argument('-r', '--sample-rate', help=f'Audio sample rate (default: {defaults["sample_rate"]} Hz)',
|
|
required=False, default=defaults['sample_rate'], dest='sample_rate', type=int)
|
|
|
|
parser.add_argument('-c', '--channels', help=f'Number of destination audio channels (default: '
|
|
f'{defaults["channels"]})',
|
|
required=False, default=defaults['channels'], dest='channels', type=int)
|
|
|
|
parser.add_argument('--ffmpeg', help=f'Absolute path to the ffmpeg executable (default: {defaults["ffmpeg_bin"]})',
|
|
required=False, default=defaults['ffmpeg_bin'], dest='ffmpeg_bin', type=str)
|
|
|
|
opts, args = parser.parse_known_args(sys.argv[1:])
|
|
return create_dataset(audio_dir=opts.audio_dir, dataset_dir=opts.dataset_dir, low_freq=opts.low_freq,
|
|
high_freq=opts.high_freq, bins=opts.bins, sample_duration=opts.sample_duration,
|
|
sample_rate=opts.sample_rate, channels=opts.channels, ffmpeg_bin=opts.ffmpeg_bin)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|