Code commit

This commit is contained in:
Fabio Manganiello 2020-10-27 15:21:32 +01:00
parent 3dbd0f1d83
commit 2f578929fb
15 changed files with 1056 additions and 0 deletions

4
.gitignore vendored
View file

@ -1 +1,5 @@
/.idea/ /.idea/
.ipynb_checkpoints
/data/
/models/
__pycache__

4
micmon/__init__.py Normal file
View file

@ -0,0 +1,4 @@
import logging
import sys
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)

6
micmon/audio/__init__.py Normal file
View file

@ -0,0 +1,6 @@
from .directory import AudioDirectory
from .segment import AudioSegment
from .player import AudioPlayer
from .source import AudioSource
from .file import AudioFile
from .device import AudioDevice

9
micmon/audio/device.py Normal file
View file

@ -0,0 +1,9 @@
from micmon.audio import AudioSource
class AudioDevice(AudioSource):
def __init__(self, system: str = 'alsa', device: str = 'plughw:0,1', *args, **kwargs):
super().__init__(*args, **kwargs)
self.ffmpeg_args = (
self.ffmpeg_bin, '-f', system, '-i', device, *self.ffmpeg_base_args
)

24
micmon/audio/directory.py Normal file
View file

@ -0,0 +1,24 @@
import os
class AudioDirectory:
_audio_file_name = 'audio.mp3'
_labels_file_name = 'labels.json'
def __init__(self, path: str):
self.path = os.path.abspath(os.path.expanduser(path))
self.audio_file = os.path.join(self.path, self._audio_file_name)
self.labels_file = os.path.join(self.path, self._labels_file_name)
assert os.path.isfile(self.audio_file) and os.path.isfile(self.audio_file), \
f'{self._audio_file_name} or {self._labels_file_name} missing from {self.path}'
@classmethod
def scan(cls, path: str) -> list:
path = os.path.abspath(os.path.expanduser(path))
return [
cls(os.path.join(path, d))
for d in os.listdir(path)
if os.path.isdir(os.path.join(path, d))
and os.path.isfile(os.path.join(path, d, cls._audio_file_name))
and os.path.isfile(os.path.join(path, d, cls._labels_file_name))
]

56
micmon/audio/file.py Normal file
View file

@ -0,0 +1,56 @@
import json
from typing import Optional, List, Tuple, Union
from micmon.audio import AudioDirectory, AudioSegment, AudioSource
class AudioFile(AudioSource):
def __init__(self, path: AudioDirectory,
start: Union[str, int, float] = 0,
duration: Optional[Union[str, int, float]] = None,
*args, **kwargs):
super().__init__(*args, **kwargs)
self.ffmpeg_args = (
self.ffmpeg_bin, '-i', path.audio_file, *(('-ss', str(start)) if start else ()),
*(('-t', str(duration)) if duration else ()), *self.ffmpeg_base_args
)
self.start = self.convert_time(start)/1000
self.duration = self.convert_time(duration)/1000
self.segments = self.parse_labels_file(path.labels_file) \
if path.labels_file else []
self.labels = sorted(list(set(label for timestamp, label in self.segments)))
self.cur_time = self.start
self.cur_label = None
@classmethod
def parse_labels_file(cls, labels_file: str) -> List[Tuple[int, Union[int, bool, str]]]:
with open(labels_file, 'r') as f:
segments = {
cls.convert_time(timestamp): label
for timestamp, label in json.load(f).items()
}
return [
(timestamp, segments[timestamp])
for timestamp in sorted(segments.keys())
]
def __next__(self) -> AudioSegment:
if not self.ffmpeg or self.ffmpeg.poll() is not None:
raise StopIteration
data = self.ffmpeg.stdout.read(self.bufsize)
if data:
while self.segments and self.cur_time * 1000 >= self.segments[0][0]:
self.cur_label = self.segments.pop(0)[1]
audio = AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels,
label=self.labels.index(self.cur_label))
self.cur_time += audio.duration
return audio
raise StopIteration

34
micmon/audio/player.py Normal file
View file

@ -0,0 +1,34 @@
import subprocess
from typing import Optional
from micmon.audio import AudioSegment
class AudioPlayer:
def __init__(self, sample_rate: int = 44100, channels: int = 1, ffplay_bin: str = 'ffplay'):
self.sample_rate = sample_rate
self.channels = channels
self.ffplay_bin = ffplay_bin
self.process: Optional[subprocess.Popen] = None
def __enter__(self):
self.process = subprocess.Popen([
self.ffplay_bin, '-f', 's16le', '-ar', str(self.sample_rate),
'-ac', str(self.channels), '-nodisp', '-'
], stdin=subprocess.PIPE)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.process:
self.process.terminate()
self.process.wait(timeout=5)
if self.process.poll is None:
self.process.kill()
self.process.wait()
self.process = None
def play(self, audio: AudioSegment):
assert self.process, 'Player is not running'
self.process.stdin.write(audio.data)

41
micmon/audio/segment.py Normal file
View file

@ -0,0 +1,41 @@
from typing import Optional, Union
import numpy as np
import matplotlib.pyplot as plt
class AudioSegment:
default_low_freq = 20
default_high_freq = 20000
default_bins = 100
def __init__(self, data: bytes, sample_rate: int = 44100, channels: int = 1, label: Optional[int] = None):
self.data = data
self.audio = np.frombuffer(data, dtype=np.int16)
self.sample_rate = sample_rate
self.channels = channels
self.duration = len(self.audio) / (sample_rate * channels)
self.label = label
def fft(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq) -> np.ndarray:
return np.absolute(np.fft.rfft(self.audio))[low_freq:high_freq]
def spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
bins: int = default_bins) -> np.ndarray:
fft = self.fft(low_freq=low_freq, high_freq=high_freq)
bin_size = int(len(fft) / bins)
return np.array([
np.average(fft[i * bin_size: i * bin_size + bin_size]) / (self.duration * ((1 << 16) - 1))
for i in range(bins)
])
def plot_audio(self):
plt.plot(self.audio)
plt.show()
def plot_spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
bins: int = default_bins):
spectrum = self.spectrum(low_freq=low_freq, high_freq=high_freq, bins=bins)
plt.ylim(0, 1)
plt.bar(range(len(spectrum)), spectrum)
plt.show()

85
micmon/audio/source.py Normal file
View file

@ -0,0 +1,85 @@
import logging
import signal
import subprocess
from abc import ABC
from typing import Optional, Union
from micmon.audio.segment import AudioSegment
class AudioSource(ABC):
def __init__(self,
sample_duration: float = 2.0,
sample_rate: int = 44100,
channels: int = 1,
ffmpeg_bin: str = 'ffmpeg'):
self.ffmpeg_bin = ffmpeg_bin
self.ffmpeg_base_args = (
'-f', 's16le',
'-acodec', 'pcm_s16le', '-ac', str(channels), '-r', str(sample_rate), '-')
self.ffmpeg_args = self.ffmpeg_base_args
# bufsize = sample_duration * rate * width * channels
self.bufsize = int(sample_duration * sample_rate * 2 * 1)
self.ffmpeg: Optional[subprocess.Popen] = None
self.sample_duration = sample_duration
self.sample_rate = sample_rate
self.channels = channels
self.logger = logging.getLogger(self.__class__.__name__)
def __iter__(self):
return self
def __next__(self) -> AudioSegment:
if not self.ffmpeg or self.ffmpeg.poll() is not None:
raise StopIteration
data = self.ffmpeg.stdout.read(self.bufsize)
if data:
return AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels)
raise StopIteration
def __enter__(self):
self.ffmpeg = subprocess.Popen(self.ffmpeg_args, stdout=subprocess.PIPE)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.ffmpeg:
self.ffmpeg.terminate()
try:
self.ffmpeg.wait(timeout=5)
except subprocess.TimeoutExpired:
self.logger.warning('FFmpeg process termination timeout')
if self.ffmpeg.poll() is None:
self.ffmpeg.kill()
self.ffmpeg.wait()
self.ffmpeg = None
def pause(self):
if not self.ffmpeg:
return
self.ffmpeg.send_signal(signal.SIGSTOP)
def resume(self):
if not self.ffmpeg:
return
self.ffmpeg.send_signal(signal.SIGCONT)
@staticmethod
def convert_time(t: Union[int, float, str]) -> int:
if not isinstance(t, str):
return int(t * 1000) if t else 0
parts = t.split(':')
hh = int(parts.pop(0)) if len(parts) == 3 else 0
mm = int(parts.pop(0)) if len(parts) == 2 else 0
parts = parts[0].split('.')
msec = int(parts.pop()) if len(parts) > 1 else 0
ss = int(parts[0])
return (hh * 60 * 60 * 1000) + (mm * 60 * 1000) + (ss * 1000) + msec

View file

@ -0,0 +1,53 @@
import os
import numpy as np
from .writer import DatasetWriter
from ..audio import AudioSegment
class Dataset:
def __init__(self, samples: np.ndarray, classes: np.ndarray, validation_split: float = 0.,
low_freq: float = AudioSegment.default_low_freq, high_freq: float = AudioSegment.default_high_freq):
self.samples = samples
self.classes = classes
self.labels = np.sort(np.unique(classes))
self.validation_split = validation_split
self.low_freq = low_freq
self.high_freq = high_freq
self.train_samples, self.train_classes, self.validation_samples, self.validation_classes = [np.array([])] * 4
self.shuffle()
@classmethod
def load(cls, npz_path: str, validation_split: float = 0.):
dataset = np.load(os.path.abspath(os.path.expanduser(npz_path)))
return cls(samples=dataset['samples'],
classes=dataset['classes'],
validation_split=validation_split,
low_freq=dataset['cutoff_frequencies'][0],
high_freq=dataset['cutoff_frequencies'][1])
@classmethod
def scan(cls, datasets_path, validation_split: float = 0.):
datasets_path = os.path.abspath(os.path.expanduser(datasets_path))
return [
cls.load(os.path.join(datasets_path, file), validation_split=validation_split)
for file in os.listdir(datasets_path)
if os.path.isfile(os.path.join(datasets_path, file))
and file.endswith('.npz')
]
def shuffle(self):
data = np.array([
(self.samples[i], self.classes[i])
for i in range(len(self.samples))
], dtype=object)
np.random.shuffle(data)
self.samples = np.array([p[0] for p in data])
self.classes = np.array([p[1] for p in data])
pivot = int(len(data) - (self.validation_split * len(data)))
self.train_samples = np.array([p[0] for p in data[:pivot]])
self.train_classes = np.array([p[1] for p in data[:pivot]])
self.validation_samples = np.array([p[0] for p in data[pivot:]])
self.validation_classes = np.array([p[1] for p in data[pivot:]])

35
micmon/dataset/writer.py Normal file
View file

@ -0,0 +1,35 @@
import os
import pathlib
import numpy as np
from micmon.audio import AudioSegment
class DatasetWriter:
def __init__(self, path: str,
low_freq: int = AudioSegment.default_low_freq,
high_freq: int = AudioSegment.default_high_freq,
bins: int = AudioSegment.default_bins):
self.path = os.path.abspath(os.path.expanduser(path))
self.low_freq = low_freq
self.high_freq = high_freq
self.bins = bins
self.samples = []
self.classes = []
def __add__(self, sample: AudioSegment):
self.samples.append(sample.spectrum(low_freq=self.low_freq, high_freq=self.high_freq, bins=self.bins))
self.classes.append(sample.label)
return self
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pathlib.Path(self.path).parent.mkdir(parents=True, exist_ok=True)
np.savez_compressed(self.path,
samples=np.array(self.samples),
classes=np.array(self.classes),
cutoff_frequencies=np.array([self.low_freq, self.high_freq]))
self.samples = []

77
micmon/model/__init__.py Normal file
View file

@ -0,0 +1,77 @@
import json
import os
import numpy as np
from typing import List, Optional, Union, Tuple
from keras import Sequential, losses, optimizers, metrics
from keras.layers import Layer
from keras.models import load_model, Model as _Model
from micmon.audio import AudioSegment
from micmon.dataset import Dataset
class Model:
labels_file_name = 'labels.json'
freq_file_name = 'freq.json'
# noinspection PyShadowingNames
def __init__(self, layers: Optional[List[Layer]] = None, labels: Optional[List[str]] = None,
model: Optional[_Model] = None, optimizer: Union[str, optimizers.Optimizer] = 'adam',
loss: Union[str, losses.Loss] = losses.SparseCategoricalCrossentropy(from_logits=True),
metrics: List[Union[str, metrics.Metric]] = ('accuracy',),
cutoff_frequencies: Tuple[int, int] = (AudioSegment.default_low_freq, AudioSegment.default_high_freq)):
assert layers or model
self.label_names = labels
self.cutoff_frequencies = list(map(int, cutoff_frequencies))
if layers:
self._model = Sequential(layers)
self._model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
else:
self._model = model
def fit(self, dataset: Dataset, *args, **kwargs):
return self._model.fit(dataset.train_samples, dataset.train_classes, *args, **kwargs)
def evaluate(self, dataset: Dataset, *args, **kwargs):
return self._model.evaluate(dataset.validation_samples, dataset.validation_classes, *args, **kwargs)
def predict(self, audio: AudioSegment):
spectrum = audio.spectrum(low_freq=self.cutoff_frequencies[0], high_freq=self.cutoff_frequencies[1])
output = self._model.predict(np.array([spectrum]))
prediction = int(np.argmax(output))
return self.label_names[prediction] if self.label_names else prediction
def save(self, model_dir: str, *args, **kwargs):
model_dir = os.path.abspath(os.path.expanduser(model_dir))
self._model.save(model_dir, *args, **kwargs)
if self.label_names:
labels_file = os.path.join(model_dir, self.labels_file_name)
with open(labels_file, 'w') as f:
json.dump(self.label_names, f)
if self.cutoff_frequencies:
freq_file = os.path.join(model_dir, self.freq_file_name)
with open(freq_file, 'w') as f:
json.dump(self.cutoff_frequencies, f)
@classmethod
def load(cls, model_dir: str, *args, **kwargs):
model_dir = os.path.abspath(os.path.expanduser(model_dir))
model = load_model(model_dir, *args, **kwargs)
labels_file = os.path.join(model_dir, cls.labels_file_name)
freq_file = os.path.join(model_dir, cls.freq_file_name)
label_names = []
frequencies = []
if os.path.isfile(labels_file):
with open(labels_file, 'r') as f:
label_names = json.load(f)
if os.path.isfile(freq_file):
with open(freq_file, 'r') as f:
frequencies = json.load(f)
return cls(model=model, labels=label_names, cutoff_frequencies=frequencies)

232
notebooks/dataset.ipynb Normal file

File diff suppressed because one or more lines are too long

181
notebooks/predict.ipynb Normal file

File diff suppressed because one or more lines are too long

215
notebooks/train.ipynb Normal file

File diff suppressed because one or more lines are too long