mirror of
https://github.com/BlackLight/micmon.git
synced 2024-11-24 04:35:13 +01:00
Code commit
This commit is contained in:
parent
3dbd0f1d83
commit
2f578929fb
15 changed files with 1056 additions and 0 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1 +1,5 @@
|
||||||
/.idea/
|
/.idea/
|
||||||
|
.ipynb_checkpoints
|
||||||
|
/data/
|
||||||
|
/models/
|
||||||
|
__pycache__
|
||||||
|
|
4
micmon/__init__.py
Normal file
4
micmon/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
|
6
micmon/audio/__init__.py
Normal file
6
micmon/audio/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from .directory import AudioDirectory
|
||||||
|
from .segment import AudioSegment
|
||||||
|
from .player import AudioPlayer
|
||||||
|
from .source import AudioSource
|
||||||
|
from .file import AudioFile
|
||||||
|
from .device import AudioDevice
|
9
micmon/audio/device.py
Normal file
9
micmon/audio/device.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from micmon.audio import AudioSource
|
||||||
|
|
||||||
|
|
||||||
|
class AudioDevice(AudioSource):
|
||||||
|
def __init__(self, system: str = 'alsa', device: str = 'plughw:0,1', *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.ffmpeg_args = (
|
||||||
|
self.ffmpeg_bin, '-f', system, '-i', device, *self.ffmpeg_base_args
|
||||||
|
)
|
24
micmon/audio/directory.py
Normal file
24
micmon/audio/directory.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class AudioDirectory:
|
||||||
|
_audio_file_name = 'audio.mp3'
|
||||||
|
_labels_file_name = 'labels.json'
|
||||||
|
|
||||||
|
def __init__(self, path: str):
|
||||||
|
self.path = os.path.abspath(os.path.expanduser(path))
|
||||||
|
self.audio_file = os.path.join(self.path, self._audio_file_name)
|
||||||
|
self.labels_file = os.path.join(self.path, self._labels_file_name)
|
||||||
|
assert os.path.isfile(self.audio_file) and os.path.isfile(self.audio_file), \
|
||||||
|
f'{self._audio_file_name} or {self._labels_file_name} missing from {self.path}'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def scan(cls, path: str) -> list:
|
||||||
|
path = os.path.abspath(os.path.expanduser(path))
|
||||||
|
return [
|
||||||
|
cls(os.path.join(path, d))
|
||||||
|
for d in os.listdir(path)
|
||||||
|
if os.path.isdir(os.path.join(path, d))
|
||||||
|
and os.path.isfile(os.path.join(path, d, cls._audio_file_name))
|
||||||
|
and os.path.isfile(os.path.join(path, d, cls._labels_file_name))
|
||||||
|
]
|
56
micmon/audio/file.py
Normal file
56
micmon/audio/file.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import json
|
||||||
|
from typing import Optional, List, Tuple, Union
|
||||||
|
|
||||||
|
from micmon.audio import AudioDirectory, AudioSegment, AudioSource
|
||||||
|
|
||||||
|
|
||||||
|
class AudioFile(AudioSource):
|
||||||
|
def __init__(self, path: AudioDirectory,
|
||||||
|
start: Union[str, int, float] = 0,
|
||||||
|
duration: Optional[Union[str, int, float]] = None,
|
||||||
|
*args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.ffmpeg_args = (
|
||||||
|
self.ffmpeg_bin, '-i', path.audio_file, *(('-ss', str(start)) if start else ()),
|
||||||
|
*(('-t', str(duration)) if duration else ()), *self.ffmpeg_base_args
|
||||||
|
)
|
||||||
|
|
||||||
|
self.start = self.convert_time(start)/1000
|
||||||
|
self.duration = self.convert_time(duration)/1000
|
||||||
|
self.segments = self.parse_labels_file(path.labels_file) \
|
||||||
|
if path.labels_file else []
|
||||||
|
|
||||||
|
self.labels = sorted(list(set(label for timestamp, label in self.segments)))
|
||||||
|
self.cur_time = self.start
|
||||||
|
self.cur_label = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_labels_file(cls, labels_file: str) -> List[Tuple[int, Union[int, bool, str]]]:
|
||||||
|
with open(labels_file, 'r') as f:
|
||||||
|
segments = {
|
||||||
|
cls.convert_time(timestamp): label
|
||||||
|
for timestamp, label in json.load(f).items()
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
(timestamp, segments[timestamp])
|
||||||
|
for timestamp in sorted(segments.keys())
|
||||||
|
]
|
||||||
|
|
||||||
|
def __next__(self) -> AudioSegment:
|
||||||
|
if not self.ffmpeg or self.ffmpeg.poll() is not None:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
data = self.ffmpeg.stdout.read(self.bufsize)
|
||||||
|
if data:
|
||||||
|
while self.segments and self.cur_time * 1000 >= self.segments[0][0]:
|
||||||
|
self.cur_label = self.segments.pop(0)[1]
|
||||||
|
|
||||||
|
audio = AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels,
|
||||||
|
label=self.labels.index(self.cur_label))
|
||||||
|
|
||||||
|
self.cur_time += audio.duration
|
||||||
|
return audio
|
||||||
|
|
||||||
|
raise StopIteration
|
||||||
|
|
34
micmon/audio/player.py
Normal file
34
micmon/audio/player.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
import subprocess
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from micmon.audio import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
|
class AudioPlayer:
|
||||||
|
def __init__(self, sample_rate: int = 44100, channels: int = 1, ffplay_bin: str = 'ffplay'):
|
||||||
|
self.sample_rate = sample_rate
|
||||||
|
self.channels = channels
|
||||||
|
self.ffplay_bin = ffplay_bin
|
||||||
|
self.process: Optional[subprocess.Popen] = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.process = subprocess.Popen([
|
||||||
|
self.ffplay_bin, '-f', 's16le', '-ar', str(self.sample_rate),
|
||||||
|
'-ac', str(self.channels), '-nodisp', '-'
|
||||||
|
], stdin=subprocess.PIPE)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
if self.process:
|
||||||
|
self.process.terminate()
|
||||||
|
self.process.wait(timeout=5)
|
||||||
|
if self.process.poll is None:
|
||||||
|
self.process.kill()
|
||||||
|
|
||||||
|
self.process.wait()
|
||||||
|
self.process = None
|
||||||
|
|
||||||
|
def play(self, audio: AudioSegment):
|
||||||
|
assert self.process, 'Player is not running'
|
||||||
|
self.process.stdin.write(audio.data)
|
41
micmon/audio/segment.py
Normal file
41
micmon/audio/segment.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
class AudioSegment:
|
||||||
|
default_low_freq = 20
|
||||||
|
default_high_freq = 20000
|
||||||
|
default_bins = 100
|
||||||
|
|
||||||
|
def __init__(self, data: bytes, sample_rate: int = 44100, channels: int = 1, label: Optional[int] = None):
|
||||||
|
self.data = data
|
||||||
|
self.audio = np.frombuffer(data, dtype=np.int16)
|
||||||
|
self.sample_rate = sample_rate
|
||||||
|
self.channels = channels
|
||||||
|
self.duration = len(self.audio) / (sample_rate * channels)
|
||||||
|
self.label = label
|
||||||
|
|
||||||
|
def fft(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq) -> np.ndarray:
|
||||||
|
return np.absolute(np.fft.rfft(self.audio))[low_freq:high_freq]
|
||||||
|
|
||||||
|
def spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
|
||||||
|
bins: int = default_bins) -> np.ndarray:
|
||||||
|
fft = self.fft(low_freq=low_freq, high_freq=high_freq)
|
||||||
|
bin_size = int(len(fft) / bins)
|
||||||
|
return np.array([
|
||||||
|
np.average(fft[i * bin_size: i * bin_size + bin_size]) / (self.duration * ((1 << 16) - 1))
|
||||||
|
for i in range(bins)
|
||||||
|
])
|
||||||
|
|
||||||
|
def plot_audio(self):
|
||||||
|
plt.plot(self.audio)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
def plot_spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
|
||||||
|
bins: int = default_bins):
|
||||||
|
spectrum = self.spectrum(low_freq=low_freq, high_freq=high_freq, bins=bins)
|
||||||
|
plt.ylim(0, 1)
|
||||||
|
plt.bar(range(len(spectrum)), spectrum)
|
||||||
|
plt.show()
|
85
micmon/audio/source.py
Normal file
85
micmon/audio/source.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
from abc import ABC
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
from micmon.audio.segment import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
|
class AudioSource(ABC):
|
||||||
|
def __init__(self,
|
||||||
|
sample_duration: float = 2.0,
|
||||||
|
sample_rate: int = 44100,
|
||||||
|
channels: int = 1,
|
||||||
|
ffmpeg_bin: str = 'ffmpeg'):
|
||||||
|
self.ffmpeg_bin = ffmpeg_bin
|
||||||
|
self.ffmpeg_base_args = (
|
||||||
|
'-f', 's16le',
|
||||||
|
'-acodec', 'pcm_s16le', '-ac', str(channels), '-r', str(sample_rate), '-')
|
||||||
|
|
||||||
|
self.ffmpeg_args = self.ffmpeg_base_args
|
||||||
|
|
||||||
|
# bufsize = sample_duration * rate * width * channels
|
||||||
|
self.bufsize = int(sample_duration * sample_rate * 2 * 1)
|
||||||
|
self.ffmpeg: Optional[subprocess.Popen] = None
|
||||||
|
self.sample_duration = sample_duration
|
||||||
|
self.sample_rate = sample_rate
|
||||||
|
self.channels = channels
|
||||||
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self) -> AudioSegment:
|
||||||
|
if not self.ffmpeg or self.ffmpeg.poll() is not None:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
data = self.ffmpeg.stdout.read(self.bufsize)
|
||||||
|
if data:
|
||||||
|
return AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels)
|
||||||
|
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.ffmpeg = subprocess.Popen(self.ffmpeg_args, stdout=subprocess.PIPE)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
if self.ffmpeg:
|
||||||
|
self.ffmpeg.terminate()
|
||||||
|
try:
|
||||||
|
self.ffmpeg.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
self.logger.warning('FFmpeg process termination timeout')
|
||||||
|
|
||||||
|
if self.ffmpeg.poll() is None:
|
||||||
|
self.ffmpeg.kill()
|
||||||
|
|
||||||
|
self.ffmpeg.wait()
|
||||||
|
self.ffmpeg = None
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
if not self.ffmpeg:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.ffmpeg.send_signal(signal.SIGSTOP)
|
||||||
|
|
||||||
|
def resume(self):
|
||||||
|
if not self.ffmpeg:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.ffmpeg.send_signal(signal.SIGCONT)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_time(t: Union[int, float, str]) -> int:
|
||||||
|
if not isinstance(t, str):
|
||||||
|
return int(t * 1000) if t else 0
|
||||||
|
|
||||||
|
parts = t.split(':')
|
||||||
|
hh = int(parts.pop(0)) if len(parts) == 3 else 0
|
||||||
|
mm = int(parts.pop(0)) if len(parts) == 2 else 0
|
||||||
|
parts = parts[0].split('.')
|
||||||
|
msec = int(parts.pop()) if len(parts) > 1 else 0
|
||||||
|
ss = int(parts[0])
|
||||||
|
return (hh * 60 * 60 * 1000) + (mm * 60 * 1000) + (ss * 1000) + msec
|
53
micmon/dataset/__init__.py
Normal file
53
micmon/dataset/__init__.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from .writer import DatasetWriter
|
||||||
|
from ..audio import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
|
class Dataset:
|
||||||
|
def __init__(self, samples: np.ndarray, classes: np.ndarray, validation_split: float = 0.,
|
||||||
|
low_freq: float = AudioSegment.default_low_freq, high_freq: float = AudioSegment.default_high_freq):
|
||||||
|
self.samples = samples
|
||||||
|
self.classes = classes
|
||||||
|
self.labels = np.sort(np.unique(classes))
|
||||||
|
self.validation_split = validation_split
|
||||||
|
self.low_freq = low_freq
|
||||||
|
self.high_freq = high_freq
|
||||||
|
self.train_samples, self.train_classes, self.validation_samples, self.validation_classes = [np.array([])] * 4
|
||||||
|
self.shuffle()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, npz_path: str, validation_split: float = 0.):
|
||||||
|
dataset = np.load(os.path.abspath(os.path.expanduser(npz_path)))
|
||||||
|
return cls(samples=dataset['samples'],
|
||||||
|
classes=dataset['classes'],
|
||||||
|
validation_split=validation_split,
|
||||||
|
low_freq=dataset['cutoff_frequencies'][0],
|
||||||
|
high_freq=dataset['cutoff_frequencies'][1])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def scan(cls, datasets_path, validation_split: float = 0.):
|
||||||
|
datasets_path = os.path.abspath(os.path.expanduser(datasets_path))
|
||||||
|
return [
|
||||||
|
cls.load(os.path.join(datasets_path, file), validation_split=validation_split)
|
||||||
|
for file in os.listdir(datasets_path)
|
||||||
|
if os.path.isfile(os.path.join(datasets_path, file))
|
||||||
|
and file.endswith('.npz')
|
||||||
|
]
|
||||||
|
|
||||||
|
def shuffle(self):
|
||||||
|
data = np.array([
|
||||||
|
(self.samples[i], self.classes[i])
|
||||||
|
for i in range(len(self.samples))
|
||||||
|
], dtype=object)
|
||||||
|
|
||||||
|
np.random.shuffle(data)
|
||||||
|
self.samples = np.array([p[0] for p in data])
|
||||||
|
self.classes = np.array([p[1] for p in data])
|
||||||
|
|
||||||
|
pivot = int(len(data) - (self.validation_split * len(data)))
|
||||||
|
self.train_samples = np.array([p[0] for p in data[:pivot]])
|
||||||
|
self.train_classes = np.array([p[1] for p in data[:pivot]])
|
||||||
|
self.validation_samples = np.array([p[0] for p in data[pivot:]])
|
||||||
|
self.validation_classes = np.array([p[1] for p in data[pivot:]])
|
35
micmon/dataset/writer.py
Normal file
35
micmon/dataset/writer.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from micmon.audio import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetWriter:
|
||||||
|
def __init__(self, path: str,
|
||||||
|
low_freq: int = AudioSegment.default_low_freq,
|
||||||
|
high_freq: int = AudioSegment.default_high_freq,
|
||||||
|
bins: int = AudioSegment.default_bins):
|
||||||
|
self.path = os.path.abspath(os.path.expanduser(path))
|
||||||
|
self.low_freq = low_freq
|
||||||
|
self.high_freq = high_freq
|
||||||
|
self.bins = bins
|
||||||
|
self.samples = []
|
||||||
|
self.classes = []
|
||||||
|
|
||||||
|
def __add__(self, sample: AudioSegment):
|
||||||
|
self.samples.append(sample.spectrum(low_freq=self.low_freq, high_freq=self.high_freq, bins=self.bins))
|
||||||
|
self.classes.append(sample.label)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
pathlib.Path(self.path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
np.savez_compressed(self.path,
|
||||||
|
samples=np.array(self.samples),
|
||||||
|
classes=np.array(self.classes),
|
||||||
|
cutoff_frequencies=np.array([self.low_freq, self.high_freq]))
|
||||||
|
|
||||||
|
self.samples = []
|
77
micmon/model/__init__.py
Normal file
77
micmon/model/__init__.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from typing import List, Optional, Union, Tuple
|
||||||
|
from keras import Sequential, losses, optimizers, metrics
|
||||||
|
from keras.layers import Layer
|
||||||
|
from keras.models import load_model, Model as _Model
|
||||||
|
|
||||||
|
from micmon.audio import AudioSegment
|
||||||
|
from micmon.dataset import Dataset
|
||||||
|
|
||||||
|
|
||||||
|
class Model:
|
||||||
|
labels_file_name = 'labels.json'
|
||||||
|
freq_file_name = 'freq.json'
|
||||||
|
|
||||||
|
# noinspection PyShadowingNames
|
||||||
|
def __init__(self, layers: Optional[List[Layer]] = None, labels: Optional[List[str]] = None,
|
||||||
|
model: Optional[_Model] = None, optimizer: Union[str, optimizers.Optimizer] = 'adam',
|
||||||
|
loss: Union[str, losses.Loss] = losses.SparseCategoricalCrossentropy(from_logits=True),
|
||||||
|
metrics: List[Union[str, metrics.Metric]] = ('accuracy',),
|
||||||
|
cutoff_frequencies: Tuple[int, int] = (AudioSegment.default_low_freq, AudioSegment.default_high_freq)):
|
||||||
|
assert layers or model
|
||||||
|
self.label_names = labels
|
||||||
|
self.cutoff_frequencies = list(map(int, cutoff_frequencies))
|
||||||
|
|
||||||
|
if layers:
|
||||||
|
self._model = Sequential(layers)
|
||||||
|
self._model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
|
||||||
|
else:
|
||||||
|
self._model = model
|
||||||
|
|
||||||
|
def fit(self, dataset: Dataset, *args, **kwargs):
|
||||||
|
return self._model.fit(dataset.train_samples, dataset.train_classes, *args, **kwargs)
|
||||||
|
|
||||||
|
def evaluate(self, dataset: Dataset, *args, **kwargs):
|
||||||
|
return self._model.evaluate(dataset.validation_samples, dataset.validation_classes, *args, **kwargs)
|
||||||
|
|
||||||
|
def predict(self, audio: AudioSegment):
|
||||||
|
spectrum = audio.spectrum(low_freq=self.cutoff_frequencies[0], high_freq=self.cutoff_frequencies[1])
|
||||||
|
output = self._model.predict(np.array([spectrum]))
|
||||||
|
prediction = int(np.argmax(output))
|
||||||
|
return self.label_names[prediction] if self.label_names else prediction
|
||||||
|
|
||||||
|
def save(self, model_dir: str, *args, **kwargs):
|
||||||
|
model_dir = os.path.abspath(os.path.expanduser(model_dir))
|
||||||
|
self._model.save(model_dir, *args, **kwargs)
|
||||||
|
|
||||||
|
if self.label_names:
|
||||||
|
labels_file = os.path.join(model_dir, self.labels_file_name)
|
||||||
|
with open(labels_file, 'w') as f:
|
||||||
|
json.dump(self.label_names, f)
|
||||||
|
|
||||||
|
if self.cutoff_frequencies:
|
||||||
|
freq_file = os.path.join(model_dir, self.freq_file_name)
|
||||||
|
with open(freq_file, 'w') as f:
|
||||||
|
json.dump(self.cutoff_frequencies, f)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, model_dir: str, *args, **kwargs):
|
||||||
|
model_dir = os.path.abspath(os.path.expanduser(model_dir))
|
||||||
|
model = load_model(model_dir, *args, **kwargs)
|
||||||
|
labels_file = os.path.join(model_dir, cls.labels_file_name)
|
||||||
|
freq_file = os.path.join(model_dir, cls.freq_file_name)
|
||||||
|
label_names = []
|
||||||
|
frequencies = []
|
||||||
|
|
||||||
|
if os.path.isfile(labels_file):
|
||||||
|
with open(labels_file, 'r') as f:
|
||||||
|
label_names = json.load(f)
|
||||||
|
|
||||||
|
if os.path.isfile(freq_file):
|
||||||
|
with open(freq_file, 'r') as f:
|
||||||
|
frequencies = json.load(f)
|
||||||
|
|
||||||
|
return cls(model=model, labels=label_names, cutoff_frequencies=frequencies)
|
232
notebooks/dataset.ipynb
Normal file
232
notebooks/dataset.ipynb
Normal file
File diff suppressed because one or more lines are too long
181
notebooks/predict.ipynb
Normal file
181
notebooks/predict.ipynb
Normal file
File diff suppressed because one or more lines are too long
215
notebooks/train.ipynb
Normal file
215
notebooks/train.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue