Code commit

2020-10-27 15:21:32 +01:00 · 2020-10-27 15:21:32 +01:00 · 2f578929fb
parent 3dbd0f1d83
commit 2f578929fb
15 changed files with 1056 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,5 @@
 /.idea/
+.ipynb_checkpoints
+/data/
+/models/
+__pycache__
--- a/micmon/init.py
+++ b/micmon/init.py
@ -0,0 +1,4 @@
+import logging
+import sys
+
+logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
--- a/micmon/audio/init.py
+++ b/micmon/audio/init.py
@ -0,0 +1,6 @@
+from .directory import AudioDirectory
+from .segment import AudioSegment
+from .player import AudioPlayer
+from .source import AudioSource
+from .file import AudioFile
+from .device import AudioDevice
--- a/micmon/audio/device.py
+++ b/micmon/audio/device.py
@ -0,0 +1,9 @@
+from micmon.audio import AudioSource
+
+
+class AudioDevice(AudioSource):
+    def __init__(self, system: str = 'alsa', device: str = 'plughw:0,1', *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ffmpeg_args = (
+            self.ffmpeg_bin, '-f', system, '-i', device, *self.ffmpeg_base_args
+        )
--- a/micmon/audio/directory.py
+++ b/micmon/audio/directory.py
@ -0,0 +1,24 @@
+import os
+
+
+class AudioDirectory:
+    _audio_file_name = 'audio.mp3'
+    _labels_file_name = 'labels.json'
+
+    def __init__(self, path: str):
+        self.path = os.path.abspath(os.path.expanduser(path))
+        self.audio_file = os.path.join(self.path, self._audio_file_name)
+        self.labels_file = os.path.join(self.path, self._labels_file_name)
+        assert os.path.isfile(self.audio_file) and os.path.isfile(self.audio_file), \
+            f'{self._audio_file_name} or {self._labels_file_name} missing from {self.path}'
+
+    @classmethod
+    def scan(cls, path: str) -> list:
+        path = os.path.abspath(os.path.expanduser(path))
+        return [
+            cls(os.path.join(path, d))
+            for d in os.listdir(path)
+            if os.path.isdir(os.path.join(path, d))
+            and os.path.isfile(os.path.join(path, d, cls._audio_file_name))
+            and os.path.isfile(os.path.join(path, d, cls._labels_file_name))
+        ]
--- a/micmon/audio/file.py
+++ b/micmon/audio/file.py
@ -0,0 +1,56 @@
+import json
+from typing import Optional, List, Tuple, Union
+
+from micmon.audio import AudioDirectory, AudioSegment, AudioSource
+
+
+class AudioFile(AudioSource):
+    def __init__(self, path: AudioDirectory,
+                 start: Union[str, int, float] = 0,
+                 duration: Optional[Union[str, int, float]] = None,
+                 *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ffmpeg_args = (
+            self.ffmpeg_bin, '-i', path.audio_file, *(('-ss', str(start)) if start else ()),
+            *(('-t', str(duration)) if duration else ()), *self.ffmpeg_base_args
+        )
+
+        self.start = self.convert_time(start)/1000
+        self.duration = self.convert_time(duration)/1000
+        self.segments = self.parse_labels_file(path.labels_file) \
+            if path.labels_file else []
+
+        self.labels = sorted(list(set(label for timestamp, label in self.segments)))
+        self.cur_time = self.start
+        self.cur_label = None
+
+    @classmethod
+    def parse_labels_file(cls, labels_file: str) -> List[Tuple[int, Union[int, bool, str]]]:
+        with open(labels_file, 'r') as f:
+            segments = {
+                cls.convert_time(timestamp): label
+                for timestamp, label in json.load(f).items()
+            }
+
+        return [
+            (timestamp, segments[timestamp])
+            for timestamp in sorted(segments.keys())
+        ]
+
+    def __next__(self) -> AudioSegment:
+        if not self.ffmpeg or self.ffmpeg.poll() is not None:
+            raise StopIteration
+
+        data = self.ffmpeg.stdout.read(self.bufsize)
+        if data:
+            while self.segments and self.cur_time * 1000 >= self.segments[0][0]:
+                self.cur_label = self.segments.pop(0)[1]
+
+            audio = AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels,
+                                 label=self.labels.index(self.cur_label))
+
+            self.cur_time += audio.duration
+            return audio
+
+        raise StopIteration
+
--- a/micmon/audio/player.py
+++ b/micmon/audio/player.py
@ -0,0 +1,34 @@
+import subprocess
+from typing import Optional
+
+from micmon.audio import AudioSegment
+
+
+class AudioPlayer:
+    def __init__(self, sample_rate: int = 44100, channels: int = 1, ffplay_bin: str = 'ffplay'):
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.ffplay_bin = ffplay_bin
+        self.process: Optional[subprocess.Popen] = None
+
+    def __enter__(self):
+        self.process = subprocess.Popen([
+            self.ffplay_bin, '-f', 's16le', '-ar', str(self.sample_rate),
+            '-ac', str(self.channels), '-nodisp', '-'
+        ], stdin=subprocess.PIPE)
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.process:
+            self.process.terminate()
+            self.process.wait(timeout=5)
+            if self.process.poll is None:
+                self.process.kill()
+
+            self.process.wait()
+            self.process = None
+
+    def play(self, audio: AudioSegment):
+        assert self.process, 'Player is not running'
+        self.process.stdin.write(audio.data)
--- a/micmon/audio/segment.py
+++ b/micmon/audio/segment.py
@ -0,0 +1,41 @@
+from typing import Optional, Union
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class AudioSegment:
+    default_low_freq = 20
+    default_high_freq = 20000
+    default_bins = 100
+
+    def __init__(self, data: bytes, sample_rate: int = 44100, channels: int = 1, label: Optional[int] = None):
+        self.data = data
+        self.audio = np.frombuffer(data, dtype=np.int16)
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.duration = len(self.audio) / (sample_rate * channels)
+        self.label = label
+
+    def fft(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq) -> np.ndarray:
+        return np.absolute(np.fft.rfft(self.audio))[low_freq:high_freq]
+
+    def spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
+                 bins: int = default_bins) -> np.ndarray:
+        fft = self.fft(low_freq=low_freq, high_freq=high_freq)
+        bin_size = int(len(fft) / bins)
+        return np.array([
+            np.average(fft[i * bin_size: i * bin_size + bin_size]) / (self.duration * ((1 << 16) - 1))
+            for i in range(bins)
+        ])
+
+    def plot_audio(self):
+        plt.plot(self.audio)
+        plt.show()
+
+    def plot_spectrum(self, low_freq: int = default_low_freq, high_freq: int = default_high_freq,
+                      bins: int = default_bins):
+        spectrum = self.spectrum(low_freq=low_freq, high_freq=high_freq, bins=bins)
+        plt.ylim(0, 1)
+        plt.bar(range(len(spectrum)), spectrum)
+        plt.show()
--- a/micmon/audio/source.py
+++ b/micmon/audio/source.py
@ -0,0 +1,85 @@
+import logging
+import signal
+import subprocess
+from abc import ABC
+from typing import Optional, Union
+
+from micmon.audio.segment import AudioSegment
+
+
+class AudioSource(ABC):
+    def __init__(self,
+                 sample_duration: float = 2.0,
+                 sample_rate: int = 44100,
+                 channels: int = 1,
+                 ffmpeg_bin: str = 'ffmpeg'):
+        self.ffmpeg_bin = ffmpeg_bin
+        self.ffmpeg_base_args = (
+            '-f', 's16le',
+            '-acodec', 'pcm_s16le', '-ac', str(channels), '-r', str(sample_rate), '-')
+
+        self.ffmpeg_args = self.ffmpeg_base_args
+
+        # bufsize = sample_duration * rate * width * channels
+        self.bufsize = int(sample_duration * sample_rate * 2 * 1)
+        self.ffmpeg: Optional[subprocess.Popen] = None
+        self.sample_duration = sample_duration
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.logger = logging.getLogger(self.__class__.__name__)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self) -> AudioSegment:
+        if not self.ffmpeg or self.ffmpeg.poll() is not None:
+            raise StopIteration
+
+        data = self.ffmpeg.stdout.read(self.bufsize)
+        if data:
+            return AudioSegment(data, sample_rate=self.sample_rate, channels=self.channels)
+
+        raise StopIteration
+
+    def __enter__(self):
+        self.ffmpeg = subprocess.Popen(self.ffmpeg_args, stdout=subprocess.PIPE)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.ffmpeg:
+            self.ffmpeg.terminate()
+            try:
+                self.ffmpeg.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.logger.warning('FFmpeg process termination timeout')
+
+            if self.ffmpeg.poll() is None:
+                self.ffmpeg.kill()
+
+            self.ffmpeg.wait()
+            self.ffmpeg = None
+
+    def pause(self):
+        if not self.ffmpeg:
+            return
+
+        self.ffmpeg.send_signal(signal.SIGSTOP)
+
+    def resume(self):
+        if not self.ffmpeg:
+            return
+
+        self.ffmpeg.send_signal(signal.SIGCONT)
+
+    @staticmethod
+    def convert_time(t: Union[int, float, str]) -> int:
+        if not isinstance(t, str):
+            return int(t * 1000) if t else 0
+
+        parts = t.split(':')
+        hh = int(parts.pop(0)) if len(parts) == 3 else 0
+        mm = int(parts.pop(0)) if len(parts) == 2 else 0
+        parts = parts[0].split('.')
+        msec = int(parts.pop()) if len(parts) > 1 else 0
+        ss = int(parts[0])
+        return (hh * 60 * 60 * 1000) + (mm * 60 * 1000) + (ss * 1000) + msec
--- a/micmon/dataset/init.py
+++ b/micmon/dataset/init.py
@ -0,0 +1,53 @@
+import os
+import numpy as np
+
+from .writer import DatasetWriter
+from ..audio import AudioSegment
+
+
+class Dataset:
+    def __init__(self, samples: np.ndarray, classes: np.ndarray, validation_split: float = 0.,
+                 low_freq: float = AudioSegment.default_low_freq, high_freq: float = AudioSegment.default_high_freq):
+        self.samples = samples
+        self.classes = classes
+        self.labels = np.sort(np.unique(classes))
+        self.validation_split = validation_split
+        self.low_freq = low_freq
+        self.high_freq = high_freq
+        self.train_samples, self.train_classes, self.validation_samples, self.validation_classes = [np.array([])] * 4
+        self.shuffle()
+
+    @classmethod
+    def load(cls, npz_path: str, validation_split: float = 0.):
+        dataset = np.load(os.path.abspath(os.path.expanduser(npz_path)))
+        return cls(samples=dataset['samples'],
+                   classes=dataset['classes'],
+                   validation_split=validation_split,
+                   low_freq=dataset['cutoff_frequencies'][0],
+                   high_freq=dataset['cutoff_frequencies'][1])
+
+    @classmethod
+    def scan(cls, datasets_path, validation_split: float = 0.):
+        datasets_path = os.path.abspath(os.path.expanduser(datasets_path))
+        return [
+            cls.load(os.path.join(datasets_path, file), validation_split=validation_split)
+            for file in os.listdir(datasets_path)
+            if os.path.isfile(os.path.join(datasets_path, file))
+            and file.endswith('.npz')
+        ]
+
+    def shuffle(self):
+        data = np.array([
+            (self.samples[i], self.classes[i])
+            for i in range(len(self.samples))
+        ], dtype=object)
+
+        np.random.shuffle(data)
+        self.samples = np.array([p[0] for p in data])
+        self.classes = np.array([p[1] for p in data])
+
+        pivot = int(len(data) - (self.validation_split * len(data)))
+        self.train_samples = np.array([p[0] for p in data[:pivot]])
+        self.train_classes = np.array([p[1] for p in data[:pivot]])
+        self.validation_samples = np.array([p[0] for p in data[pivot:]])
+        self.validation_classes = np.array([p[1] for p in data[pivot:]])
--- a/micmon/dataset/writer.py
+++ b/micmon/dataset/writer.py
@ -0,0 +1,35 @@
+import os
+import pathlib
+import numpy as np
+
+from micmon.audio import AudioSegment
+
+
+class DatasetWriter:
+    def __init__(self, path: str,
+                 low_freq: int = AudioSegment.default_low_freq,
+                 high_freq: int = AudioSegment.default_high_freq,
+                 bins: int = AudioSegment.default_bins):
+        self.path = os.path.abspath(os.path.expanduser(path))
+        self.low_freq = low_freq
+        self.high_freq = high_freq
+        self.bins = bins
+        self.samples = []
+        self.classes = []
+
+    def __add__(self, sample: AudioSegment):
+        self.samples.append(sample.spectrum(low_freq=self.low_freq, high_freq=self.high_freq, bins=self.bins))
+        self.classes.append(sample.label)
+        return self
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pathlib.Path(self.path).parent.mkdir(parents=True, exist_ok=True)
+        np.savez_compressed(self.path,
+                            samples=np.array(self.samples),
+                            classes=np.array(self.classes),
+                            cutoff_frequencies=np.array([self.low_freq, self.high_freq]))
+
+        self.samples = []
--- a/micmon/model/init.py
+++ b/micmon/model/init.py
@ -0,0 +1,77 @@
+import json
+import os
+import numpy as np
+
+from typing import List, Optional, Union, Tuple
+from keras import Sequential, losses, optimizers, metrics
+from keras.layers import Layer
+from keras.models import load_model, Model as _Model
+
+from micmon.audio import AudioSegment
+from micmon.dataset import Dataset
+
+
+class Model:
+    labels_file_name = 'labels.json'
+    freq_file_name = 'freq.json'
+
+    # noinspection PyShadowingNames
+    def __init__(self, layers: Optional[List[Layer]] = None, labels: Optional[List[str]] = None,
+                 model: Optional[_Model] = None, optimizer: Union[str, optimizers.Optimizer] = 'adam',
+                 loss: Union[str, losses.Loss] = losses.SparseCategoricalCrossentropy(from_logits=True),
+                 metrics: List[Union[str, metrics.Metric]] = ('accuracy',),
+                 cutoff_frequencies: Tuple[int, int] = (AudioSegment.default_low_freq, AudioSegment.default_high_freq)):
+        assert layers or model
+        self.label_names = labels
+        self.cutoff_frequencies = list(map(int, cutoff_frequencies))
+
+        if layers:
+            self._model = Sequential(layers)
+            self._model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
+        else:
+            self._model = model
+
+    def fit(self, dataset: Dataset, *args, **kwargs):
+        return self._model.fit(dataset.train_samples, dataset.train_classes, *args, **kwargs)
+
+    def evaluate(self, dataset: Dataset, *args, **kwargs):
+        return self._model.evaluate(dataset.validation_samples, dataset.validation_classes, *args, **kwargs)
+
+    def predict(self, audio: AudioSegment):
+        spectrum = audio.spectrum(low_freq=self.cutoff_frequencies[0], high_freq=self.cutoff_frequencies[1])
+        output = self._model.predict(np.array([spectrum]))
+        prediction = int(np.argmax(output))
+        return self.label_names[prediction] if self.label_names else prediction
+
+    def save(self, model_dir: str, *args, **kwargs):
+        model_dir = os.path.abspath(os.path.expanduser(model_dir))
+        self._model.save(model_dir, *args, **kwargs)
+
+        if self.label_names:
+            labels_file = os.path.join(model_dir, self.labels_file_name)
+            with open(labels_file, 'w') as f:
+                json.dump(self.label_names, f)
+
+        if self.cutoff_frequencies:
+            freq_file = os.path.join(model_dir, self.freq_file_name)
+            with open(freq_file, 'w') as f:
+                json.dump(self.cutoff_frequencies, f)
+
+    @classmethod
+    def load(cls, model_dir: str, *args, **kwargs):
+        model_dir = os.path.abspath(os.path.expanduser(model_dir))
+        model = load_model(model_dir, *args, **kwargs)
+        labels_file = os.path.join(model_dir, cls.labels_file_name)
+        freq_file = os.path.join(model_dir, cls.freq_file_name)
+        label_names = []
+        frequencies = []
+
+        if os.path.isfile(labels_file):
+            with open(labels_file, 'r') as f:
+                label_names = json.load(f)
+
+        if os.path.isfile(freq_file):
+            with open(freq_file, 'r') as f:
+                frequencies = json.load(f)
+
+        return cls(model=model, labels=label_names, cutoff_frequencies=frequencies)
--- a/notebooks/dataset.ipynb
+++ b/notebooks/dataset.ipynb
--- a/notebooks/predict.ipynb
+++ b/notebooks/predict.ipynb
--- a/notebooks/train.ipynb
+++ b/notebooks/train.ipynb