forked from platypush/platypush
[media
] Added logic to parse local media metadata.
This commit is contained in:
parent
a83f4729a6
commit
52ee614ec4
3 changed files with 281 additions and 132 deletions
|
@ -1,27 +1,28 @@
|
||||||
import datetime
|
import datetime
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from sqlalchemy import (
|
from sqlalchemy import create_engine
|
||||||
create_engine,
|
|
||||||
Column,
|
|
||||||
Integer,
|
|
||||||
String,
|
|
||||||
DateTime,
|
|
||||||
PrimaryKeyConstraint,
|
|
||||||
ForeignKey,
|
|
||||||
)
|
|
||||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
|
||||||
from sqlalchemy.sql.expression import func
|
from sqlalchemy.sql.expression import func
|
||||||
|
|
||||||
from platypush.common.db import declarative_base
|
|
||||||
from platypush.config import Config
|
from platypush.config import Config
|
||||||
from platypush.plugins.media import MediaPlugin
|
from platypush.plugins.media import MediaPlugin
|
||||||
from platypush.plugins.media.search import MediaSearcher
|
from platypush.plugins.media.search import MediaSearcher
|
||||||
|
|
||||||
Base = declarative_base()
|
from .db import (
|
||||||
Session = scoped_session(sessionmaker())
|
Base,
|
||||||
|
MediaDirectory,
|
||||||
|
MediaFile,
|
||||||
|
MediaFileToken,
|
||||||
|
MediaToken,
|
||||||
|
Session,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .metadata import get_metadata
|
||||||
|
|
||||||
|
_db_lock = threading.RLock()
|
||||||
|
|
||||||
|
|
||||||
class LocalMediaSearcher(MediaSearcher):
|
class LocalMediaSearcher(MediaSearcher):
|
||||||
|
@ -37,7 +38,7 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
def __init__(self, dirs, *args, **kwargs):
|
def __init__(self, dirs, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.dirs = dirs
|
self.dirs = dirs
|
||||||
db_dir = os.path.join(Config.get('workdir'), 'media')
|
db_dir = os.path.join(Config.get_workdir(), 'media')
|
||||||
os.makedirs(db_dir, exist_ok=True)
|
os.makedirs(db_dir, exist_ok=True)
|
||||||
self.db_file = os.path.join(db_dir, 'media.db')
|
self.db_file = os.path.join(db_dir, 'media.db')
|
||||||
self._db_engine = None
|
self._db_engine = None
|
||||||
|
@ -142,6 +143,7 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
).delete(synchronize_session='fetch')
|
).delete(synchronize_session='fetch')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
media_files = []
|
||||||
stored_file_records = {
|
stored_file_records = {
|
||||||
f.path: f for f in self._get_file_records(dir_record, session)
|
f.path: f for f in self._get_file_records(dir_record, session)
|
||||||
}
|
}
|
||||||
|
@ -162,7 +164,7 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
) and not MediaPlugin.is_audio_file(filename):
|
) and not MediaPlugin.is_audio_file(filename):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.logger.debug('Syncing item {}'.format(filepath))
|
self.logger.info('Scanning item %s', filepath)
|
||||||
tokens = [
|
tokens = [
|
||||||
_.lower()
|
_.lower()
|
||||||
for _ in re.split(self._filename_separators, filename.strip())
|
for _ in re.split(self._filename_separators, filename.strip())
|
||||||
|
@ -170,9 +172,9 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
|
|
||||||
token_records = self._sync_token_records(session, *tokens)
|
token_records = self._sync_token_records(session, *tokens)
|
||||||
file_record = MediaFile.build(directory_id=dir_record.id, path=filepath)
|
file_record = MediaFile.build(directory_id=dir_record.id, path=filepath)
|
||||||
|
|
||||||
session.add(file_record)
|
session.add(file_record)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
file_record = (
|
file_record = (
|
||||||
session.query(MediaFile)
|
session.query(MediaFile)
|
||||||
.filter_by(directory_id=dir_record.id, path=filepath)
|
.filter_by(directory_id=dir_record.id, path=filepath)
|
||||||
|
@ -185,6 +187,8 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
)
|
)
|
||||||
session.add(file_token)
|
session.add(file_token)
|
||||||
|
|
||||||
|
media_files.append(file_record)
|
||||||
|
|
||||||
# stored_file_records should now only contain the records of the files
|
# stored_file_records should now only contain the records of the files
|
||||||
# that have been removed from the directory
|
# that have been removed from the directory
|
||||||
if stored_file_records:
|
if stored_file_records:
|
||||||
|
@ -198,7 +202,7 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
MediaFile.id.in_([record.id for record in stored_file_records.values()])
|
MediaFile.id.in_([record.id for record in stored_file_records.values()])
|
||||||
).delete(synchronize_session='fetch')
|
).delete(synchronize_session='fetch')
|
||||||
|
|
||||||
dir_record.last_indexed_at = datetime.datetime.now()
|
dir_record.last_indexed_at = datetime.datetime.now() # type: ignore
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
'Scanned {} in {} seconds'.format(
|
'Scanned {} in {} seconds'.format(
|
||||||
media_dir, int(time.time() - index_start_time)
|
media_dir, int(time.time() - index_start_time)
|
||||||
|
@ -207,7 +211,32 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
def search(self, query, **kwargs):
|
# Start the metadata scan in a separate thread
|
||||||
|
threading.Thread(
|
||||||
|
target=self._metadata_scan_thread, args=(media_files,), daemon=True
|
||||||
|
).start()
|
||||||
|
|
||||||
|
def _metadata_scan_thread(self, records):
|
||||||
|
"""
|
||||||
|
Thread that will scan the media files in the given paths and update
|
||||||
|
their metadata.
|
||||||
|
"""
|
||||||
|
paths = [record.path for record in records]
|
||||||
|
metadata = get_metadata(*paths)
|
||||||
|
session = self._get_db_session()
|
||||||
|
|
||||||
|
for record, data in zip(records, metadata):
|
||||||
|
record = session.merge(record)
|
||||||
|
record.duration = data.get('duration') # type: ignore
|
||||||
|
record.width = data.get('width') # type: ignore
|
||||||
|
record.height = data.get('height') # type: ignore
|
||||||
|
record.image = data.get('image') # type: ignore
|
||||||
|
record.created_at = data.get('created_at') # type: ignore
|
||||||
|
session.add(record)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
def search(self, query, **_):
|
||||||
"""
|
"""
|
||||||
Searches in the configured media directories given a query. It uses the
|
Searches in the configured media directories given a query. It uses the
|
||||||
built-in SQLite index if available. If any directory has changed since
|
built-in SQLite index if available. If any directory has changed since
|
||||||
|
@ -218,123 +247,46 @@ class LocalMediaSearcher(MediaSearcher):
|
||||||
session = self._get_db_session()
|
session = self._get_db_session()
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for media_dir in self.dirs:
|
with _db_lock:
|
||||||
self.logger.info('Searching {} for "{}"'.format(media_dir, query))
|
for media_dir in self.dirs:
|
||||||
dir_record = self._get_or_create_dir_entry(session, media_dir)
|
self.logger.info('Searching {} for "{}"'.format(media_dir, query))
|
||||||
|
dir_record = self._get_or_create_dir_entry(session, media_dir)
|
||||||
|
|
||||||
if self._has_directory_changed_since_last_indexing(dir_record):
|
if self._has_directory_changed_since_last_indexing(dir_record):
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
'{} has changed since last indexing, '.format(media_dir)
|
'{} has changed since last indexing, '.format(media_dir)
|
||||||
+ 're-indexing'
|
+ 're-indexing'
|
||||||
)
|
)
|
||||||
|
|
||||||
self.scan(media_dir, session=session, dir_record=dir_record)
|
self.scan(media_dir, session=session, dir_record=dir_record)
|
||||||
|
|
||||||
query_tokens = [
|
query_tokens = [
|
||||||
_.lower() for _ in re.split(self._filename_separators, query.strip())
|
_.lower()
|
||||||
]
|
for _ in re.split(self._filename_separators, query.strip())
|
||||||
|
]
|
||||||
|
|
||||||
for file_record in (
|
for file_record in session.query(MediaFile).where(
|
||||||
session.query(MediaFile.path)
|
MediaFile.id.in_(
|
||||||
.join(MediaFileToken)
|
session.query(MediaFile.id)
|
||||||
.join(MediaToken)
|
.join(MediaFileToken)
|
||||||
.filter(MediaToken.token.in_(query_tokens))
|
.join(MediaToken)
|
||||||
.group_by(MediaFile.path)
|
.filter(MediaToken.token.in_(query_tokens))
|
||||||
.having(func.count(MediaFileToken.token_id) >= len(query_tokens))
|
.group_by(MediaFile.id)
|
||||||
):
|
.having(
|
||||||
if os.path.isfile(file_record.path):
|
func.count(MediaFileToken.token_id) >= len(query_tokens)
|
||||||
results[file_record.path] = {
|
)
|
||||||
'url': 'file://' + file_record.path,
|
)
|
||||||
'title': os.path.basename(file_record.path),
|
):
|
||||||
'size': os.path.getsize(file_record.path),
|
if os.path.isfile(file_record.path):
|
||||||
}
|
results[file_record.path] = {
|
||||||
|
'url': 'file://' + file_record.path,
|
||||||
|
'title': os.path.basename(file_record.path),
|
||||||
|
'size': os.path.getsize(file_record.path),
|
||||||
|
'duration': file_record.duration,
|
||||||
|
'width': file_record.width,
|
||||||
|
'height': file_record.height,
|
||||||
|
'image': file_record.image,
|
||||||
|
'created_at': file_record.created_at,
|
||||||
|
}
|
||||||
|
|
||||||
return results.values()
|
return results.values()
|
||||||
|
|
||||||
|
|
||||||
# --- Table definitions
|
|
||||||
|
|
||||||
|
|
||||||
class MediaDirectory(Base):
|
|
||||||
"""Models the MediaDirectory table"""
|
|
||||||
|
|
||||||
__tablename__ = 'MediaDirectory'
|
|
||||||
__table_args__ = {'sqlite_autoincrement': True}
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
path = Column(String)
|
|
||||||
last_indexed_at = Column(DateTime)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def build(cls, path, last_indexed_at=None, id=None):
|
|
||||||
record = cls()
|
|
||||||
record.id = id
|
|
||||||
record.path = path
|
|
||||||
record.last_indexed_at = last_indexed_at
|
|
||||||
return record
|
|
||||||
|
|
||||||
|
|
||||||
class MediaFile(Base):
|
|
||||||
"""Models the MediaFile table"""
|
|
||||||
|
|
||||||
__tablename__ = 'MediaFile'
|
|
||||||
__table_args__ = {'sqlite_autoincrement': True}
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
directory_id = Column(
|
|
||||||
Integer, ForeignKey('MediaDirectory.id', ondelete='CASCADE'), nullable=False
|
|
||||||
)
|
|
||||||
path = Column(String, nullable=False, unique=True)
|
|
||||||
indexed_at = Column(DateTime)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def build(cls, directory_id, path, indexed_at=None, id=None):
|
|
||||||
record = cls()
|
|
||||||
record.id = id
|
|
||||||
record.directory_id = directory_id
|
|
||||||
record.path = path
|
|
||||||
record.indexed_at = indexed_at or datetime.datetime.now()
|
|
||||||
return record
|
|
||||||
|
|
||||||
|
|
||||||
class MediaToken(Base):
|
|
||||||
"""Models the MediaToken table"""
|
|
||||||
|
|
||||||
__tablename__ = 'MediaToken'
|
|
||||||
__table_args__ = {'sqlite_autoincrement': True}
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
token = Column(String, nullable=False, unique=True)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def build(cls, token, id=None):
|
|
||||||
record = cls()
|
|
||||||
record.id = id
|
|
||||||
record.token = token
|
|
||||||
return record
|
|
||||||
|
|
||||||
|
|
||||||
class MediaFileToken(Base):
|
|
||||||
"""Models the MediaFileToken table"""
|
|
||||||
|
|
||||||
__tablename__ = 'MediaFileToken'
|
|
||||||
|
|
||||||
file_id = Column(
|
|
||||||
Integer, ForeignKey('MediaFile.id', ondelete='CASCADE'), nullable=False
|
|
||||||
)
|
|
||||||
token_id = Column(
|
|
||||||
Integer, ForeignKey('MediaToken.id', ondelete='CASCADE'), nullable=False
|
|
||||||
)
|
|
||||||
|
|
||||||
__table_args__ = (PrimaryKeyConstraint(file_id, token_id), {})
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def build(cls, file_id, token_id, id=None):
|
|
||||||
record = cls()
|
|
||||||
record.id = id
|
|
||||||
record.file_id = file_id
|
|
||||||
record.token_id = token_id
|
|
||||||
return record
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
|
111
platypush/plugins/media/search/local/db.py
Normal file
111
platypush/plugins/media/search/local/db.py
Normal file
|
@ -0,0 +1,111 @@
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
Float,
|
||||||
|
Column,
|
||||||
|
Integer,
|
||||||
|
String,
|
||||||
|
DateTime,
|
||||||
|
PrimaryKeyConstraint,
|
||||||
|
ForeignKey,
|
||||||
|
)
|
||||||
|
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||||
|
|
||||||
|
from platypush.common.db import declarative_base
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
Session = scoped_session(sessionmaker())
|
||||||
|
|
||||||
|
|
||||||
|
class MediaDirectory(Base):
|
||||||
|
"""Models the MediaDirectory table"""
|
||||||
|
|
||||||
|
__tablename__ = 'MediaDirectory'
|
||||||
|
__table_args__ = {'sqlite_autoincrement': True}
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
path = Column(String)
|
||||||
|
last_indexed_at = Column(DateTime)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build(cls, path, last_indexed_at=None, id=None):
|
||||||
|
record = cls()
|
||||||
|
record.id = id
|
||||||
|
record.path = path
|
||||||
|
record.last_indexed_at = last_indexed_at
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
class MediaFile(Base):
|
||||||
|
"""Models the MediaFile table"""
|
||||||
|
|
||||||
|
__tablename__ = 'MediaFile'
|
||||||
|
__table_args__ = {'sqlite_autoincrement': True}
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
directory_id = Column(
|
||||||
|
Integer, ForeignKey('MediaDirectory.id', ondelete='CASCADE'), nullable=False
|
||||||
|
)
|
||||||
|
path = Column(String, nullable=False, unique=True)
|
||||||
|
duration = Column(Float)
|
||||||
|
width = Column(Integer)
|
||||||
|
height = Column(Integer)
|
||||||
|
image = Column(String)
|
||||||
|
created_at = Column(DateTime)
|
||||||
|
indexed_at = Column(DateTime)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build(cls, directory_id, path, indexed_at=None, id=None, **kwargs):
|
||||||
|
record = cls()
|
||||||
|
record.id = id
|
||||||
|
record.directory_id = directory_id
|
||||||
|
record.path = path
|
||||||
|
record.indexed_at = indexed_at or datetime.datetime.now()
|
||||||
|
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
setattr(record, k, v)
|
||||||
|
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
class MediaToken(Base):
|
||||||
|
"""Models the MediaToken table"""
|
||||||
|
|
||||||
|
__tablename__ = 'MediaToken'
|
||||||
|
__table_args__ = {'sqlite_autoincrement': True}
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
token = Column(String, nullable=False, unique=True)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build(cls, token, id=None):
|
||||||
|
record = cls()
|
||||||
|
record.id = id
|
||||||
|
record.token = token
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
class MediaFileToken(Base):
|
||||||
|
"""Models the MediaFileToken table"""
|
||||||
|
|
||||||
|
__tablename__ = 'MediaFileToken'
|
||||||
|
|
||||||
|
file_id = Column(
|
||||||
|
Integer, ForeignKey('MediaFile.id', ondelete='CASCADE'), nullable=False
|
||||||
|
)
|
||||||
|
token_id = Column(
|
||||||
|
Integer, ForeignKey('MediaToken.id', ondelete='CASCADE'), nullable=False
|
||||||
|
)
|
||||||
|
|
||||||
|
__table_args__ = (PrimaryKeyConstraint(file_id, token_id), {})
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build(cls, file_id, token_id, id=None):
|
||||||
|
record = cls()
|
||||||
|
record.id = id
|
||||||
|
record.file_id = file_id
|
||||||
|
record.token_id = token_id
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
86
platypush/plugins/media/search/local/metadata.py
Normal file
86
platypush/plugins/media/search/local/metadata.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import multiprocessing
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
|
||||||
|
from dateutil.parser import isoparse
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_metadata(path: str):
|
||||||
|
"""
|
||||||
|
Retrieves the metadata of a media file using ffprobe.
|
||||||
|
"""
|
||||||
|
logger.info('Retrieving metadata for %s', path)
|
||||||
|
|
||||||
|
with subprocess.Popen(
|
||||||
|
[
|
||||||
|
'ffprobe',
|
||||||
|
'-v',
|
||||||
|
'quiet',
|
||||||
|
'-print_format',
|
||||||
|
'json',
|
||||||
|
'-show_format',
|
||||||
|
'-show_streams',
|
||||||
|
path,
|
||||||
|
],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
) as ffprobe:
|
||||||
|
ret = json.loads(ffprobe.communicate()[0])
|
||||||
|
|
||||||
|
video_stream = next(
|
||||||
|
iter(
|
||||||
|
[
|
||||||
|
stream
|
||||||
|
for stream in ret.get('streams', [])
|
||||||
|
if stream.get('codec_type') == 'video'
|
||||||
|
]
|
||||||
|
),
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
creation_time = ret.get('format', {}).get('tags', {}).get('creation_time')
|
||||||
|
if creation_time:
|
||||||
|
try:
|
||||||
|
creation_time = isoparse(creation_time)
|
||||||
|
except ValueError:
|
||||||
|
creation_time = None
|
||||||
|
|
||||||
|
if not creation_time:
|
||||||
|
creation_time = datetime.datetime.fromtimestamp(os.path.getctime(path))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'duration': ret.get('format', {}).get('duration'),
|
||||||
|
'width': video_stream.get('width'),
|
||||||
|
'height': video_stream.get('height'),
|
||||||
|
'created_at': creation_time,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata(*paths: str):
|
||||||
|
"""
|
||||||
|
Retrieves the metadata of media files using ffprobe.
|
||||||
|
"""
|
||||||
|
logger.info('Retrieving metadata for %d media files', len(paths))
|
||||||
|
try:
|
||||||
|
assert shutil.which(
|
||||||
|
'ffprobe'
|
||||||
|
), 'ffprobe not found in PATH. Install ffmpeg to retrieve local media metadata.'
|
||||||
|
|
||||||
|
# Run ffprobe in parallel
|
||||||
|
with ProcessPoolExecutor(
|
||||||
|
max_workers=multiprocessing.cpu_count() * 2
|
||||||
|
) as executor:
|
||||||
|
futures = [executor.submit(get_file_metadata, path) for path in paths]
|
||||||
|
results = [future.result() for future in futures]
|
||||||
|
|
||||||
|
logger.info('Retrieved metadata for %d media files', len(results))
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
logger.error('Failed to retrieve media metadata: %s: %s', type(e).__name__, e)
|
||||||
|
return [{} for _ in paths]
|
Loading…
Reference in a new issue