mirror of https://github.com/BlackLight/micmon.git
Code commit
This commit is contained in:
parent
3dbd0f1d83
commit
2f578929fb
|
@ -1 +1,5 @@
|
|||
/.idea/
|
||||
.ipynb_checkpoints
|
||||
/data/
|
||||
/models/
|
||||
__pycache__
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
|
|
@ -0,0 +1,6 @@
|
|||
from .directory import AudioDirectory
|
||||
from .segment import AudioSegment
|
||||
from .player import AudioPlayer
|
||||
from .source import AudioSource
|
||||
from .file import AudioFile
|
||||
from .device import AudioDevice
|
|
@ -0,0 +1,9 @@
|
|||
from micmon.audio import AudioSource
|
||||
|
||||
|
||||
class AudioDevice(AudioSource):
|
||||
def __init__(self, system: str = 'alsa', device: str = 'plughw:0,1', *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.ffmpeg_args = (
|
||||
self.ffmpeg_bin, '-f', system, '-i', device, *self.ffmpeg_base_args
|
||||
)
|
|
@ -0,0 +1,24 @@
|
|||
import os
|
||||
|
||||
|
||||
class AudioDirectory:
|
||||
_audio_file_name = 'audio.mp3'
|
||||
_labels_file_name = 'labels.json'
|
||||
|
||||
def __init__(self, path: str):
|
||||
self.path = os.path.abspath(os.path.expanduser(path))
|
||||
self.audio_file = os.path.join(self.path, self._audio_file_name)
|
||||
self.labels_file = os.path.join(self.path, self._labels_file_name)
|
||||
assert os.path.isfile(self.audio_file) and os.path.isfile(self.audio_file), \
|
||||
f'{self._audio_file_name} or {self._labels_file_name} missing from {self.path}'
|
||||
|
||||
@classmethod
|
||||
def scan(cls, path: str) -> list:
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
return [
|
||||
cls(os.path.join(path, d))
|
||||
for d in os.listdir(path)
|
||||
if os.path.isdir(os.path.join(path, d))
|
||||
and os.path.isfile(os.path.join(path, d, cls._audio_file_name))
|
||||
and os.path.isfile(os.path.join(path, d, cls._labels_file_name))
|
||||
]
|
|
@ -0,0 +1,56 @@
|
|||
import json
|
||||
from typing import Optional, List, Tuple, Union
|
||||
|
||||
from micmon.audio import AudioDirectory, AudioSegment, AudioSource
|
||||
|
||||
|
||||
class AudioFile(AudioSource):
|
||||
def __init__(self, path: AudioDirectory,
|
||||
start: Union[str, int, float] = 0,
|
||||
duration: Optional[Union[str, int, float]] = None,
|
||||
*args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.ffmpeg_args = (
|
||||
self.ffmpeg_bin, '-i', path.audio_file, *(('-ss', str(start)) if start else ()),
|
||||
*(('-t', str(duration)) if duration else ()), *self.ffmpeg_base_args
|
||||
)
|
||||
|
||||
self.start = self.convert_time(start)/1000
|
||||
self.duration = self.convert_time(duration)/1000
|
||||
self.segments = self.parse_labels_file(path.labels_file) \
|
||||
if path.labels_file else []
|
||||
|
||||
self.labels = sorted(list(set(label for timestamp, label in self.segments)))
|
||||
self.cur_time = self.start
|
||||
self.cur_label = None
|
||||
|
||||
@classmethod
|
||||
def parse_labels_file(cls, labels_file: str) -> List[Tuple[int, Union[int, bool, str]]]:
|
||||
with open(labels_file, 'r') as f:
|
||||
segments = {
|
||||
cls.convert_time(timestamp): label
|
||||
for timestamp, label in json.load(f).items()
|
||||
}
|
||||
|
||||
return [
|
||||
(timestamp, segments[timestamp])
|
||||
for timestamp in sorted(segments.keys())
|
||||
]
|
||||
|
||||
def __next__(self) -> AudioSegment:
|
||||
if not self.ffmpeg or self.ffmpeg.poll() is not None:
|
||||
raise StopIteration
|
||||
|
||||
data = self.ffmpeg.stdout.read(self.bufsize)
|
||||
if data:
|
||||
while self.segments and self.cur_time * 1000 >= self.segments[0][0]:
|
||||
self.cur_label = self.segments.pop(0)[1]
|
||||
|
||||
audio = AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels,
|
||||
label=self.labels.index(self.cur_label))
|
||||
|
||||
self.cur_time += audio.duration
|
||||
return audio
|
||||
|
||||
raise StopIteration
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
from micmon.audio import AudioSegment
|
||||
|
||||
|
||||
class AudioPlayer:
|
||||
def __init__(self, sample_rate: int = 44100, channels: int = 1, ffplay_bin: str = 'ffplay'):
|
||||
self.sample_rate = sample_rate
|
||||
self.channels = channels
|
||||
self.ffplay_bin = ffplay_bin
|
||||
self.process: Optional[subprocess.Popen] = None
|
||||
|
||||
def __enter__(self):
|
||||
self.process = subprocess.Popen([
|
||||
self.ffplay_bin, '-f', 's16le', '-ar', str(self.sample_rate),
|
||||
'-ac', str(self.channels), '-nodisp', '-'
|
||||
], stdin=subprocess.PIPE)
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
self.process.wait(timeout=5)
|
||||
if self.process.poll is None:
|
||||
self.process.kill()
|
||||
|
||||
self.process.wait()
|
||||
self.process = None
|
||||
|
||||
def play(self, audio: AudioSegment):
|
||||
assert self.process, 'Player is not running'
|
||||
self.process.stdin.write(audio.data)
|
|
@ -0,0 +1,41 @@
|
|||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
class AudioSegment:
|
||||
default_low_freq = 20
|
||||
default_high_freq = 20000
|
||||
default_bins = 100
|
||||
|
||||
def __init__(self, data: bytes, sample_rate: int = 44100, channels: int = 1, label: Optional[int] = None):
|
||||
self.data = data
|
||||
self.audio = np.frombuffer(data, dtype=np.int16)
|
||||
self.sample_rate = sample_rate
|
||||
self.channels = channels
|
||||
self.duration = len(self.audio) / (sample_rate * channels)
|
||||
self.label = label
|
||||
|
||||
def fft(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq) -> np.ndarray:
|
||||
return np.absolute(np.fft.rfft(self.audio))[low_freq:high_freq]
|
||||
|
||||
def spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
|
||||
bins: int = default_bins) -> np.ndarray:
|
||||
fft = self.fft(low_freq=low_freq, high_freq=high_freq)
|
||||
bin_size = int(len(fft) / bins)
|
||||
return np.array([
|
||||
np.average(fft[i * bin_size: i * bin_size + bin_size]) / (self.duration * ((1 << 16) - 1))
|
||||
for i in range(bins)
|
||||
])
|
||||
|
||||
def plot_audio(self):
|
||||
plt.plot(self.audio)
|
||||
plt.show()
|
||||
|
||||
def plot_spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
|
||||
bins: int = default_bins):
|
||||
spectrum = self.spectrum(low_freq=low_freq, high_freq=high_freq, bins=bins)
|
||||
plt.ylim(0, 1)
|
||||
plt.bar(range(len(spectrum)), spectrum)
|
||||
plt.show()
|
|
@ -0,0 +1,85 @@
|
|||
import logging
|
||||
import signal
|
||||
import subprocess
|
||||
from abc import ABC
|
||||
from typing import Optional, Union
|
||||
|
||||
from micmon.audio.segment import AudioSegment
|
||||
|
||||
|
||||
class AudioSource(ABC):
|
||||
def __init__(self,
|
||||
sample_duration: float = 2.0,
|
||||
sample_rate: int = 44100,
|
||||
channels: int = 1,
|
||||
ffmpeg_bin: str = 'ffmpeg'):
|
||||
self.ffmpeg_bin = ffmpeg_bin
|
||||
self.ffmpeg_base_args = (
|
||||
'-f', 's16le',
|
||||
'-acodec', 'pcm_s16le', '-ac', str(channels), '-r', str(sample_rate), '-')
|
||||
|
||||
self.ffmpeg_args = self.ffmpeg_base_args
|
||||
|
||||
# bufsize = sample_duration * rate * width * channels
|
||||
self.bufsize = int(sample_duration * sample_rate * 2 * 1)
|
||||
self.ffmpeg: Optional[subprocess.Popen] = None
|
||||
self.sample_duration = sample_duration
|
||||
self.sample_rate = sample_rate
|
||||
self.channels = channels
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self) -> AudioSegment:
|
||||
if not self.ffmpeg or self.ffmpeg.poll() is not None:
|
||||
raise StopIteration
|
||||
|
||||
data = self.ffmpeg.stdout.read(self.bufsize)
|
||||
if data:
|
||||
return AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels)
|
||||
|
||||
raise StopIteration
|
||||
|
||||
def __enter__(self):
|
||||
self.ffmpeg = subprocess.Popen(self.ffmpeg_args, stdout=subprocess.PIPE)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.ffmpeg:
|
||||
self.ffmpeg.terminate()
|
||||
try:
|
||||
self.ffmpeg.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.warning('FFmpeg process termination timeout')
|
||||
|
||||
if self.ffmpeg.poll() is None:
|
||||
self.ffmpeg.kill()
|
||||
|
||||
self.ffmpeg.wait()
|
||||
self.ffmpeg = None
|
||||
|
||||
def pause(self):
|
||||
if not self.ffmpeg:
|
||||
return
|
||||
|
||||
self.ffmpeg.send_signal(signal.SIGSTOP)
|
||||
|
||||
def resume(self):
|
||||
if not self.ffmpeg:
|
||||
return
|
||||
|
||||
self.ffmpeg.send_signal(signal.SIGCONT)
|
||||
|
||||
@staticmethod
|
||||
def convert_time(t: Union[int, float, str]) -> int:
|
||||
if not isinstance(t, str):
|
||||
return int(t * 1000) if t else 0
|
||||
|
||||
parts = t.split(':')
|
||||
hh = int(parts.pop(0)) if len(parts) == 3 else 0
|
||||
mm = int(parts.pop(0)) if len(parts) == 2 else 0
|
||||
parts = parts[0].split('.')
|
||||
msec = int(parts.pop()) if len(parts) > 1 else 0
|
||||
ss = int(parts[0])
|
||||
return (hh * 60 * 60 * 1000) + (mm * 60 * 1000) + (ss * 1000) + msec
|
|
@ -0,0 +1,53 @@
|
|||
import os
|
||||
import numpy as np
|
||||
|
||||
from .writer import DatasetWriter
|
||||
from ..audio import AudioSegment
|
||||
|
||||
|
||||
class Dataset:
|
||||
def __init__(self, samples: np.ndarray, classes: np.ndarray, validation_split: float = 0.,
|
||||
low_freq: float = AudioSegment.default_low_freq, high_freq: float = AudioSegment.default_high_freq):
|
||||
self.samples = samples
|
||||
self.classes = classes
|
||||
self.labels = np.sort(np.unique(classes))
|
||||
self.validation_split = validation_split
|
||||
self.low_freq = low_freq
|
||||
self.high_freq = high_freq
|
||||
self.train_samples, self.train_classes, self.validation_samples, self.validation_classes = [np.array([])] * 4
|
||||
self.shuffle()
|
||||
|
||||
@classmethod
|
||||
def load(cls, npz_path: str, validation_split: float = 0.):
|
||||
dataset = np.load(os.path.abspath(os.path.expanduser(npz_path)))
|
||||
return cls(samples=dataset['samples'],
|
||||
classes=dataset['classes'],
|
||||
validation_split=validation_split,
|
||||
low_freq=dataset['cutoff_frequencies'][0],
|
||||
high_freq=dataset['cutoff_frequencies'][1])
|
||||
|
||||
@classmethod
|
||||
def scan(cls, datasets_path, validation_split: float = 0.):
|
||||
datasets_path = os.path.abspath(os.path.expanduser(datasets_path))
|
||||
return [
|
||||
cls.load(os.path.join(datasets_path, file), validation_split=validation_split)
|
||||
for file in os.listdir(datasets_path)
|
||||
if os.path.isfile(os.path.join(datasets_path, file))
|
||||
and file.endswith('.npz')
|
||||
]
|
||||
|
||||
def shuffle(self):
|
||||
data = np.array([
|
||||
(self.samples[i], self.classes[i])
|
||||
for i in range(len(self.samples))
|
||||
], dtype=object)
|
||||
|
||||
np.random.shuffle(data)
|
||||
self.samples = np.array([p[0] for p in data])
|
||||
self.classes = np.array([p[1] for p in data])
|
||||
|
||||
pivot = int(len(data) - (self.validation_split * len(data)))
|
||||
self.train_samples = np.array([p[0] for p in data[:pivot]])
|
||||
self.train_classes = np.array([p[1] for p in data[:pivot]])
|
||||
self.validation_samples = np.array([p[0] for p in data[pivot:]])
|
||||
self.validation_classes = np.array([p[1] for p in data[pivot:]])
|
|
@ -0,0 +1,35 @@
|
|||
import os
|
||||
import pathlib
|
||||
import numpy as np
|
||||
|
||||
from micmon.audio import AudioSegment
|
||||
|
||||
|
||||
class DatasetWriter:
|
||||
def __init__(self, path: str,
|
||||
low_freq: int = AudioSegment.default_low_freq,
|
||||
high_freq: int = AudioSegment.default_high_freq,
|
||||
bins: int = AudioSegment.default_bins):
|
||||
self.path = os.path.abspath(os.path.expanduser(path))
|
||||
self.low_freq = low_freq
|
||||
self.high_freq = high_freq
|
||||
self.bins = bins
|
||||
self.samples = []
|
||||
self.classes = []
|
||||
|
||||
def __add__(self, sample: AudioSegment):
|
||||
self.samples.append(sample.spectrum(low_freq=self.low_freq, high_freq=self.high_freq, bins=self.bins))
|
||||
self.classes.append(sample.label)
|
||||
return self
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pathlib.Path(self.path).parent.mkdir(parents=True, exist_ok=True)
|
||||
np.savez_compressed(self.path,
|
||||
samples=np.array(self.samples),
|
||||
classes=np.array(self.classes),
|
||||
cutoff_frequencies=np.array([self.low_freq, self.high_freq]))
|
||||
|
||||
self.samples = []
|
|
@ -0,0 +1,77 @@
|
|||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
from typing import List, Optional, Union, Tuple
|
||||
from keras import Sequential, losses, optimizers, metrics
|
||||
from keras.layers import Layer
|
||||
from keras.models import load_model, Model as _Model
|
||||
|
||||
from micmon.audio import AudioSegment
|
||||
from micmon.dataset import Dataset
|
||||
|
||||
|
||||
class Model:
|
||||
labels_file_name = 'labels.json'
|
||||
freq_file_name = 'freq.json'
|
||||
|
||||
# noinspection PyShadowingNames
|
||||
def __init__(self, layers: Optional[List[Layer]] = None, labels: Optional[List[str]] = None,
|
||||
model: Optional[_Model] = None, optimizer: Union[str, optimizers.Optimizer] = 'adam',
|
||||
loss: Union[str, losses.Loss] = losses.SparseCategoricalCrossentropy(from_logits=True),
|
||||
metrics: List[Union[str, metrics.Metric]] = ('accuracy',),
|
||||
cutoff_frequencies: Tuple[int, int] = (AudioSegment.default_low_freq, AudioSegment.default_high_freq)):
|
||||
assert layers or model
|
||||
self.label_names = labels
|
||||
self.cutoff_frequencies = list(map(int, cutoff_frequencies))
|
||||
|
||||
if layers:
|
||||
self._model = Sequential(layers)
|
||||
self._model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
|
||||
else:
|
||||
self._model = model
|
||||
|
||||
def fit(self, dataset: Dataset, *args, **kwargs):
|
||||
return self._model.fit(dataset.train_samples, dataset.train_classes, *args, **kwargs)
|
||||
|
||||
def evaluate(self, dataset: Dataset, *args, **kwargs):
|
||||
return self._model.evaluate(dataset.validation_samples, dataset.validation_classes, *args, **kwargs)
|
||||
|
||||
def predict(self, audio: AudioSegment):
|
||||
spectrum = audio.spectrum(low_freq=self.cutoff_frequencies[0], high_freq=self.cutoff_frequencies[1])
|
||||
output = self._model.predict(np.array([spectrum]))
|
||||
prediction = int(np.argmax(output))
|
||||
return self.label_names[prediction] if self.label_names else prediction
|
||||
|
||||
def save(self, model_dir: str, *args, **kwargs):
|
||||
model_dir = os.path.abspath(os.path.expanduser(model_dir))
|
||||
self._model.save(model_dir, *args, **kwargs)
|
||||
|
||||
if self.label_names:
|
||||
labels_file = os.path.join(model_dir, self.labels_file_name)
|
||||
with open(labels_file, 'w') as f:
|
||||
json.dump(self.label_names, f)
|
||||
|
||||
if self.cutoff_frequencies:
|
||||
freq_file = os.path.join(model_dir, self.freq_file_name)
|
||||
with open(freq_file, 'w') as f:
|
||||
json.dump(self.cutoff_frequencies, f)
|
||||
|
||||
@classmethod
|
||||
def load(cls, model_dir: str, *args, **kwargs):
|
||||
model_dir = os.path.abspath(os.path.expanduser(model_dir))
|
||||
model = load_model(model_dir, *args, **kwargs)
|
||||
labels_file = os.path.join(model_dir, cls.labels_file_name)
|
||||
freq_file = os.path.join(model_dir, cls.freq_file_name)
|
||||
label_names = []
|
||||
frequencies = []
|
||||
|
||||
if os.path.isfile(labels_file):
|
||||
with open(labels_file, 'r') as f:
|
||||
label_names = json.load(f)
|
||||
|
||||
if os.path.isfile(freq_file):
|
||||
with open(freq_file, 'r') as f:
|
||||
frequencies = json.load(f)
|
||||
|
||||
return cls(model=model, labels=label_names, cutoff_frequencies=frequencies)
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue