forked from platypush/platypush
[#407] Implemented torrent.csv backend
This commit is contained in:
parent
8fc3201b8c
commit
91e2530dd5
10 changed files with 648 additions and 49 deletions
|
@ -30,6 +30,86 @@ from ._search import TorrentSearchProvider
|
|||
class TorrentPlugin(Plugin):
|
||||
"""
|
||||
Plugin to search and download torrents.
|
||||
|
||||
Search
|
||||
------
|
||||
|
||||
You can search for torrents using the :meth:`search` method. The method will
|
||||
use the search providers configured in the ``search_providers`` attribute of
|
||||
the plugin configuration. Currently supported search providers:
|
||||
|
||||
* ``popcorntime`:
|
||||
:class:`platypush.plugins.torrent._search.PopcornTimeSearchProvider`
|
||||
* ``torrents.csv``:
|
||||
:class:`platypush.plugins.torrent._search.TorrentsCsvSearchProvider`
|
||||
|
||||
``torrents.csv`` will be enabled by default unless you explicitly disable
|
||||
it. ``torrents.csv`` also supports both:
|
||||
|
||||
* A remote API via the ``api_url`` attribute (default:
|
||||
`https://torrents-csv.com/service``). You can also run your own API
|
||||
server by following the instructions at `heretic/torrents-csv-server
|
||||
<https://git.torrents-csv.com/heretic/torrents-csv-server>`_.
|
||||
|
||||
* A local checkout of the ``torrents.csv`` file. Clone the
|
||||
`heretic/torrents-csv-data
|
||||
<https://git.torrents-csv.com/heretic/torrents-csv-data>`_ and provide
|
||||
the path to the ``torrents.csv`` file in the ``csv_file`` attribute.
|
||||
|
||||
* A local checkout of the ``torrents.db`` file built from the
|
||||
``torrents.csv`` file. Follow the instructions at
|
||||
`heretic/torrents-csv-data
|
||||
<https://git.torrents-csv.com/heretic/torrents-csv-data>`_ on how to
|
||||
build the ``torrents.db`` file from the ``torrents.csv`` file.
|
||||
|
||||
If you opt for a local checkout of the ``torrents.csv`` file, then
|
||||
Platypush will build the SQLite database from the CSV file for you - no need
|
||||
to use external services. This however means that the first search will be
|
||||
slower as the database is being built. Subsequent searches will be faster,
|
||||
unless you modify the CSV file - in this case, an updated database will be
|
||||
built from the latest CSV file.
|
||||
|
||||
You can also specify the ``download_csv`` property in the configuration. In
|
||||
this case, Platypush will automatically download the latest ``torrents.csv``
|
||||
file locally and build the SQLite database from it. On startup, Platypush
|
||||
will check if either the local or remote CSV file has been updated, and
|
||||
rebuild the database if necessary.
|
||||
|
||||
``popcorntime`` will be disabled by default unless you explicitly enable it.
|
||||
That's because, at the time of writing (June 2024), there are no publicly
|
||||
available PopcornTime API servers. You can run your own PopcornTime API
|
||||
server by following the instructions at `popcorn-time-ru/popcorn-ru
|
||||
<https://github.com/popcorn-time-ru/popcorn-ru>`_.
|
||||
|
||||
Configuration example:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
torrent:
|
||||
# ...
|
||||
|
||||
search_providers:
|
||||
torrents.csv:
|
||||
# Default: True
|
||||
# enabled: true
|
||||
# Base URL of the torrents.csv API.
|
||||
api_url: https://torrents-csv.com/service
|
||||
|
||||
# Alternatively, you can also use a local checkout of the
|
||||
# torrents.csv file.
|
||||
# csv_file: /path/to/torrents.csv
|
||||
|
||||
# Or a manually built SQLite database from the torrents.csv file.
|
||||
# db_file: /path/to/torrents.db
|
||||
|
||||
# Or automatically download the latest torrents.csv file.
|
||||
# download_csv: true
|
||||
popcorn_time:
|
||||
# Default: false
|
||||
# enabled: false
|
||||
# Required: PopcornTime API base URL.
|
||||
api_url: https://popcorntime.app
|
||||
|
||||
"""
|
||||
|
||||
_http_timeout = 20
|
||||
|
@ -54,41 +134,7 @@ class TorrentPlugin(Plugin):
|
|||
:param download_dir: Directory where the videos/torrents will be
|
||||
downloaded (default: ``~/Downloads``).
|
||||
:param torrent_ports: Torrent ports to listen on (default: 6881 and 6891)
|
||||
:param search_providers: List of search providers to use. Each provider
|
||||
has its own supported configuration and needs to be an instance of
|
||||
:class:`TorrentSearchProvider`. Currently supported providers:
|
||||
|
||||
* :class:`platypush.plugins.torrent._search.PopcornTimeSearchProvider`
|
||||
* :class:`platypush.plugins.torrent._search.TorrentCsvSearchProvider`
|
||||
|
||||
Configuration example:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
torrent:
|
||||
# ...
|
||||
|
||||
search_providers:
|
||||
torrent_csv:
|
||||
# Default: True
|
||||
# enabled: true
|
||||
# Base URL of the torrent-csv API.
|
||||
# See https://git.torrents-csv.com/heretic/torrents-csv-server
|
||||
# for how to run your own torrent-csv API server.
|
||||
api_url: https://torrents-csv.com/service
|
||||
# Alternatively, you can also use a local checkout of the
|
||||
# torrent.csv file. Clone
|
||||
# https://git.torrents-csv.com/heretic/torrents-csv-data
|
||||
# and provide the path to the torrent.csv file here.
|
||||
# csv_file: /path/to/torrent.csv
|
||||
popcorn_time:
|
||||
# Default: False
|
||||
# enabled: false
|
||||
# Required: PopcornTime API base URL.
|
||||
# See https://github.com/popcorn-time-ru/popcorn-ru for
|
||||
# how to run your own PopcornTime API server.
|
||||
api_url: https://popcorntime.app
|
||||
|
||||
:param search_providers: List of search providers to use.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
@ -96,7 +142,13 @@ class TorrentPlugin(Plugin):
|
|||
self.download_dir = os.path.abspath(
|
||||
os.path.expanduser(download_dir or get_default_downloads_dir())
|
||||
)
|
||||
|
||||
self._search_providers = self._load_search_providers(search_providers)
|
||||
self.logger.info(
|
||||
'Loaded search providers: %s',
|
||||
[provider.provider_name() for provider in self._search_providers],
|
||||
)
|
||||
|
||||
self._sessions = {}
|
||||
self._lt_session = None
|
||||
pathlib.Path(self.download_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
@ -107,18 +159,22 @@ class TorrentPlugin(Plugin):
|
|||
Union[Dict[str, dict], Iterable[TorrentSearchProvider]]
|
||||
],
|
||||
) -> Iterable[TorrentSearchProvider]:
|
||||
provider_classes = {
|
||||
cls.provider_name(): cls
|
||||
for _, cls in inspect.getmembers(search_module, inspect.isclass)
|
||||
if issubclass(cls, TorrentSearchProvider) and cls != TorrentSearchProvider
|
||||
}
|
||||
|
||||
if not search_providers:
|
||||
return []
|
||||
return [
|
||||
provider()
|
||||
for provider in provider_classes.values()
|
||||
if provider.default_enabled()
|
||||
]
|
||||
|
||||
parsed_providers = []
|
||||
if isinstance(search_providers, dict):
|
||||
providers_dict = {}
|
||||
provider_classes = {
|
||||
cls.provider_name(): cls
|
||||
for _, cls in inspect.getmembers(search_module, inspect.isclass)
|
||||
if issubclass(cls, TorrentSearchProvider)
|
||||
and cls != TorrentSearchProvider
|
||||
}
|
||||
|
||||
# Configure the search providers explicitly passed in the configuration
|
||||
for provider_name, provider_config in search_providers.items():
|
||||
|
@ -523,7 +579,9 @@ class TorrentPlugin(Plugin):
|
|||
:type torrent: str
|
||||
"""
|
||||
|
||||
assert torrent in self.transfers, f"No transfer in progress for {torrent}"
|
||||
if not self.transfers.get(torrent):
|
||||
self.logger.info('No transfer in progress for %s', torrent)
|
||||
return
|
||||
|
||||
self.transfers[torrent].pause()
|
||||
del self.torrent_state[torrent]
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
from ._base import TorrentSearchProvider
|
||||
from ._popcorntime import PopcornTimeSearchProvider
|
||||
from ._torrents_csv import TorrentsCsvSearchProvider
|
||||
|
||||
|
||||
__all__ = [
|
||||
'TorrentSearchProvider',
|
||||
'PopcornTimeSearchProvider',
|
||||
'TorrentsCsvSearchProvider',
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ class TorrentSearchProvider(ABC):
|
|||
)
|
||||
return []
|
||||
|
||||
self.logger.debug('Searching for %r', query)
|
||||
self.logger.debug("Searching for %r", query)
|
||||
return self._search(query, *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
from ._facade import TorrentsCsvSearchProvider
|
||||
|
||||
__all__ = ['TorrentsCsvSearchProvider']
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
100
platypush/plugins/torrent/_search/_torrents_csv/_base.py
Normal file
100
platypush/plugins/torrent/_search/_torrents_csv/_base.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from logging import getLogger
|
||||
from typing import List, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from .._base import TorrentSearchProvider
|
||||
from .._model import TorrentSearchResult
|
||||
|
||||
|
||||
class TorrentsCsvBaseProvider(TorrentSearchProvider, ABC):
|
||||
"""
|
||||
Base class for Torrents.csv search providers.
|
||||
"""
|
||||
|
||||
_http_timeout = 20
|
||||
_magnet_trackers = [
|
||||
'http://125.227.35.196:6969/announce',
|
||||
'http://210.244.71.25:6969/announce',
|
||||
'http://210.244.71.26:6969/announce',
|
||||
'http://213.159.215.198:6970/announce',
|
||||
'http://37.19.5.139:6969/announce',
|
||||
'http://37.19.5.155:6881/announce',
|
||||
'http://87.248.186.252:8080/announce',
|
||||
'http://asmlocator.ru:34000/1hfZS1k4jh/announce',
|
||||
'http://bt.evrl.to/announce',
|
||||
'http://bt.rutracker.org/ann',
|
||||
'https://www.artikelplanet.nl',
|
||||
'http://mgtracker.org:6969/announce',
|
||||
'http://tracker.baravik.org:6970/announce',
|
||||
'http://tracker.dler.org:6969/announce',
|
||||
'http://tracker.filetracker.pl:8089/announce',
|
||||
'http://tracker.grepler.com:6969/announce',
|
||||
'http://tracker.mg64.net:6881/announce',
|
||||
'http://tracker.tiny-vps.com:6969/announce',
|
||||
'http://tracker.torrentyorg.pl/announce',
|
||||
'https://internet.sitelio.me/',
|
||||
'https://computer1.sitelio.me/',
|
||||
'udp://168.235.67.63:6969',
|
||||
'udp://37.19.5.155:2710',
|
||||
'udp://46.148.18.250:2710',
|
||||
'udp://46.4.109.148:6969',
|
||||
'udp://computerbedrijven.bestelinks.nl/',
|
||||
'udp://computerbedrijven.startsuper.nl/',
|
||||
'udp://computershop.goedbegin.nl/',
|
||||
'udp://c3t.org',
|
||||
'udp://allerhandelenlaag.nl',
|
||||
'udp://tracker.opentrackr.org:1337',
|
||||
'udp://tracker.publicbt.com:80',
|
||||
'udp://tracker.tiny-vps.com:6969',
|
||||
'udp://tracker.openbittorrent.com:80',
|
||||
'udp://opentor.org:2710',
|
||||
'udp://tracker.ccc.de:80',
|
||||
'udp://tracker.blackunicorn.xyz:6969',
|
||||
'udp://tracker.coppersurfer.tk:6969',
|
||||
'udp://tracker.leechers-paradise.org:6969',
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
trackers: Optional[List[str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
:param trackers: List of additional trackers to use.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.logger = getLogger(self.__class__.__name__)
|
||||
self.trackers = list({*self._magnet_trackers, *(trackers or [])})
|
||||
|
||||
@classmethod
|
||||
def provider_name(cls) -> str:
|
||||
return 'torrents.csv'
|
||||
|
||||
@abstractmethod
|
||||
def _search( # pylint: disable=arguments-differ
|
||||
self, query: str, *_, limit: int, page: int, **__
|
||||
) -> List[TorrentSearchResult]:
|
||||
"""
|
||||
To be implemented by subclasses.
|
||||
|
||||
:param query: Query string.
|
||||
:param limit: Number of results to return (default: 25).
|
||||
:param page: Page number (default: 1).
|
||||
"""
|
||||
|
||||
def _to_magnet(self, info_hash: str, torrent_name: str) -> str:
|
||||
"""
|
||||
Generate a magnet link from an info hash and torrent name.
|
||||
|
||||
:param info_hash: Torrent info hash.
|
||||
:param torrent_name: Torrent name.
|
||||
:return: Magnet link.
|
||||
"""
|
||||
return (
|
||||
f'magnet:?xt=urn:btih:{info_hash}&dn={quote_plus(torrent_name)}&tr='
|
||||
+ '&tr='.join([quote_plus(tracker) for tracker in self.trackers])
|
||||
)
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
|
@ -0,0 +1,15 @@
|
|||
"""
|
||||
Common Torrents CSV constants.
|
||||
"""
|
||||
|
||||
TORRENT_CSV_API_URL = 'https://torrents-csv.com/service'
|
||||
""" Default Torrents CSV API base URL. """
|
||||
|
||||
TORRENTS_CSV_DOWNLOAD_URL = 'https://git.torrents-csv.com/heretic/torrents-csv-data/raw/branch/main/torrents.csv'
|
||||
""" Default torrents.csv download URL. """
|
||||
|
||||
TORRENTS_CSV_URL_LAST_CHECKED_VAR = '_TORRENTS_CSV_URL_LAST_CHECKED'
|
||||
""" Environment variable to store the last checked timestamp for the torrents.csv URL. """
|
||||
|
||||
TORRENTS_CSV_DEFAULT_CHECK_INTERVAL = 60 * 60 * 24
|
||||
""" Interval in seconds to re-check the torrents.csv URL (24 hours). """
|
84
platypush/plugins/torrent/_search/_torrents_csv/_facade.py
Normal file
84
platypush/plugins/torrent/_search/_torrents_csv/_facade.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
from .._model import TorrentSearchResult
|
||||
from ._base import TorrentsCsvBaseProvider
|
||||
from ._constants import (
|
||||
TORRENT_CSV_API_URL,
|
||||
TORRENTS_CSV_DOWNLOAD_URL,
|
||||
TORRENTS_CSV_DEFAULT_CHECK_INTERVAL,
|
||||
)
|
||||
from .api import TorrentsCsvAPIProvider
|
||||
from .local import TorrentsCsvLocalProvider
|
||||
|
||||
|
||||
class TorrentsCsvSearchProvider(TorrentsCsvBaseProvider):
|
||||
"""
|
||||
Torrent that uses `Torrents.csv <https://torrents-csv.com/>`_ to search
|
||||
for torrents, either by using the API or by leveraging a local database.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str = TORRENT_CSV_API_URL,
|
||||
csv_url: str = TORRENTS_CSV_DOWNLOAD_URL,
|
||||
download_csv: bool = False,
|
||||
csv_path: Optional[str] = None,
|
||||
db_path: Optional[str] = None,
|
||||
csv_url_check_interval: int = TORRENTS_CSV_DEFAULT_CHECK_INTERVAL,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
:param api_url: Torrents.csv API URL.
|
||||
:param csv_url: Torrents.csv CSV URL.
|
||||
:param download_csv: Whether to download the CSV file.
|
||||
:param csv_path: Path to the CSV file.
|
||||
:param db_path: Path to the SQLite database file.
|
||||
:param csv_url_check_interval: Interval to check for CSV updates.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.api_url = api_url
|
||||
self.csv_url = csv_url
|
||||
self.download_csv = download_csv
|
||||
self.csv_path = os.path.expanduser(csv_path) if csv_path else None
|
||||
self.db_path = os.path.expanduser(db_path) if db_path else None
|
||||
self.csv_url_check_interval = csv_url_check_interval
|
||||
|
||||
@property
|
||||
def _delegate(self) -> TorrentsCsvBaseProvider:
|
||||
"""
|
||||
:return: The provider to delegate the search to.
|
||||
"""
|
||||
if self.download_csv or self.csv_path or self.db_path:
|
||||
return TorrentsCsvLocalProvider(
|
||||
download_csv=self.download_csv,
|
||||
csv_url=self.csv_url,
|
||||
csv_path=self.csv_path,
|
||||
db_path=self.db_path,
|
||||
csv_url_check_interval=self.csv_url_check_interval,
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
return TorrentsCsvAPIProvider(api_url=self.api_url, enabled=True)
|
||||
|
||||
@classmethod
|
||||
def default_enabled(cls) -> bool:
|
||||
"""
|
||||
This provider is enabled by default.
|
||||
"""
|
||||
return True
|
||||
|
||||
def _search(
|
||||
self, query: str, *_, limit: int = 25, page: int = 1, **__
|
||||
) -> List[TorrentSearchResult]:
|
||||
"""
|
||||
Perform a search of torrents using the Torrent.csv API.
|
||||
|
||||
:param query: Query string.
|
||||
:param limit: Number of results to return (default: 25).
|
||||
:param page: Page number (default: 1).
|
||||
"""
|
||||
return list(self._delegate.search(query=query, limit=limit, page=page))
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
59
platypush/plugins/torrent/_search/_torrents_csv/api.py
Normal file
59
platypush/plugins/torrent/_search/_torrents_csv/api.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from .._model import TorrentSearchResult
|
||||
from ._base import TorrentsCsvBaseProvider
|
||||
|
||||
|
||||
class TorrentsCsvAPIProvider(TorrentsCsvBaseProvider):
|
||||
"""
|
||||
Torrent that uses `Torrents.csv <https://torrents-csv.com/>`_ or any other
|
||||
`Torrents.csv API <https://torrents-csv.com/service>`_ instance to search
|
||||
for torrents.
|
||||
"""
|
||||
|
||||
def __init__(self, api_url: str, **kwargs):
|
||||
"""
|
||||
:param api_url: Torrents.csv API base URL.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.api_url = api_url
|
||||
|
||||
def _search(
|
||||
self, query: str, *_, limit: int, page: int, **__
|
||||
) -> List[TorrentSearchResult]:
|
||||
"""
|
||||
Perform a search of torrents using the Torrent.csv API.
|
||||
|
||||
:param query: Query string.
|
||||
:param limit: Number of results to return (default: 25).
|
||||
:param page: Page number (default: 1).
|
||||
"""
|
||||
response = requests.get(
|
||||
f'{self.api_url}/search',
|
||||
params={
|
||||
'q': query,
|
||||
'size': limit,
|
||||
'page': page,
|
||||
},
|
||||
timeout=self._http_timeout,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
return [
|
||||
TorrentSearchResult(
|
||||
title=torrent.get('name', '[No Title]'),
|
||||
url=self._to_magnet(
|
||||
info_hash=torrent.get('infohash'), torrent_name=torrent.get('name')
|
||||
),
|
||||
size=torrent.get('size_bytes'),
|
||||
created_at=torrent.get('created_unix'),
|
||||
seeds=torrent.get('seeders'),
|
||||
peers=torrent.get('leechers'),
|
||||
)
|
||||
for torrent in response.json().get('torrents', [])
|
||||
]
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
272
platypush/plugins/torrent/_search/_torrents_csv/local.py
Normal file
272
platypush/plugins/torrent/_search/_torrents_csv/local.py
Normal file
|
@ -0,0 +1,272 @@
|
|||
import datetime as dt
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import stat
|
||||
import subprocess
|
||||
import time
|
||||
from threading import RLock
|
||||
from typing import List, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import requests
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
from platypush.config import Config
|
||||
from platypush.context import Variable
|
||||
|
||||
from .._model import TorrentSearchResult
|
||||
from ._base import TorrentsCsvBaseProvider
|
||||
from ._constants import TORRENTS_CSV_URL_LAST_CHECKED_VAR
|
||||
|
||||
SQL_INIT_TEMPLATE = """
|
||||
create table torrent_tmp (
|
||||
infohash text primary key,
|
||||
name text not null,
|
||||
size_bytes integer not null,
|
||||
created_unix integer(4) not null,
|
||||
seeders integer not null,
|
||||
leechers integer not null,
|
||||
completed integer not null,
|
||||
scraped_date integer(4) not null,
|
||||
published integer(4) not null
|
||||
);
|
||||
|
||||
.separator ,
|
||||
.import --skip 1 '{csv_file}' torrent_tmp
|
||||
|
||||
create index idx_name on torrent_tmp(lower(name));
|
||||
create index idx_seeders on torrent_tmp(seeders);
|
||||
create index idx_created_unix on torrent_tmp(created_unix);
|
||||
|
||||
drop table if exists torrent;
|
||||
alter table torrent_tmp rename to torrent;
|
||||
"""
|
||||
|
||||
|
||||
class TorrentsCsvLocalProvider(TorrentsCsvBaseProvider):
|
||||
"""
|
||||
This class is responsible for managing a local checkout of the torrents-csv
|
||||
dataset.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
download_csv: bool,
|
||||
csv_url: str,
|
||||
csv_url_check_interval: int,
|
||||
csv_path: Optional[str] = None,
|
||||
db_path: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Note that at least one among ``download_csv``, ``csv_path`` and ``db_path``
|
||||
should be provided.
|
||||
|
||||
:param download_csv: If True then the CSV file will be downloaded from the
|
||||
specified ``csv_url``.
|
||||
:param csv_url: The URL from which the CSV file will be downloaded.
|
||||
:param csv_url_check_interval: The interval in seconds after which the CSV
|
||||
should be checked for updates.
|
||||
:param csv_path: The path to the CSV file. If not provided, and download_csv
|
||||
is set to True, then the CSV file will be downloaded to
|
||||
``<WORKDIR>/torrent/torrents.csv``.
|
||||
:param db_path: The path to the SQLite database. If not provided, and
|
||||
``csv_path`` or ``download_csv`` are set, then the database will be created
|
||||
from a local copy of the CSV file.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
assert (
|
||||
download_csv or csv_path or db_path
|
||||
), 'You must provide either download_csv, csv_path or db_path'
|
||||
|
||||
self._init_csv_lock = RLock()
|
||||
self._init_db_lock = RLock()
|
||||
self._csv_url_check_interval = csv_url_check_interval
|
||||
|
||||
if download_csv:
|
||||
csv_path = (
|
||||
os.path.expanduser(csv_path)
|
||||
if csv_path
|
||||
else os.path.join(Config.get_workdir(), 'torrent', 'torrents.csv')
|
||||
)
|
||||
|
||||
with self._init_csv_lock:
|
||||
self._download_csv(csv_url=csv_url, csv_path=csv_path)
|
||||
|
||||
if csv_path:
|
||||
db_path = (
|
||||
os.path.expanduser(db_path)
|
||||
if db_path
|
||||
else os.path.join(os.path.dirname(csv_path), 'torrents.db')
|
||||
)
|
||||
|
||||
with self._init_db_lock:
|
||||
self._build_db(csv_path=csv_path, db_path=db_path)
|
||||
|
||||
assert db_path, 'No download_csv, csv_path or db_path provided'
|
||||
assert os.path.isfile(db_path), f'Invalid db_path: {db_path}'
|
||||
self.db_path = db_path
|
||||
|
||||
def _get_engine(self):
|
||||
return create_engine(
|
||||
'sqlite:///' + ('/'.join(map(quote_plus, self.db_path.split(os.path.sep))))
|
||||
)
|
||||
|
||||
def _download_csv(self, csv_url: str, csv_path: str):
|
||||
if not self._should_download_csv(
|
||||
csv_url=csv_url,
|
||||
csv_path=csv_path,
|
||||
csv_url_check_interval=self._csv_url_check_interval,
|
||||
):
|
||||
return
|
||||
|
||||
self.logger.info(
|
||||
'Downloading the torrents CSV file from %s to %s', csv_url, csv_path
|
||||
)
|
||||
|
||||
response = requests.get(csv_url, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
size = int(response.headers.get('Content-Length', 0))
|
||||
torrents_csv_dir = os.path.dirname(csv_path)
|
||||
pathlib.Path(torrents_csv_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(csv_path, 'wb') as f:
|
||||
written = 0
|
||||
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
written += len(chunk)
|
||||
if size:
|
||||
percent = 100.0 * written / size
|
||||
prev_percent = max(0, 100.0 * (written - len(chunk)) / size)
|
||||
if round(percent / 5) > round(prev_percent / 5):
|
||||
self.logger.info('... %.2f%%\r', percent)
|
||||
|
||||
self.logger.info('Downloaded the torrents CSV file to %s', csv_path)
|
||||
|
||||
def _build_db(self, csv_path: str, db_path: str):
|
||||
if not self._should_update_db(csv_path, db_path):
|
||||
return
|
||||
|
||||
self.logger.info(
|
||||
'Refreshing SQLite database %s from CSV file %s', db_path, csv_path
|
||||
)
|
||||
|
||||
db_dir = os.path.dirname(db_path)
|
||||
pathlib.Path(db_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with subprocess.Popen(
|
||||
['sqlite3', db_path], stdin=subprocess.PIPE, text=True
|
||||
) as proc:
|
||||
proc.communicate(SQL_INIT_TEMPLATE.format(csv_file=csv_path))
|
||||
|
||||
self.logger.info(
|
||||
'Refreshed SQLite database %s from CSV file %s: ready to search',
|
||||
db_path,
|
||||
csv_path,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _should_update_db(csv_path: str, db_path: str) -> bool:
|
||||
if not os.path.isfile(csv_path):
|
||||
return False
|
||||
|
||||
if not os.path.isfile(db_path):
|
||||
return True
|
||||
|
||||
return os.stat(db_path)[stat.ST_MTIME] < os.stat(csv_path)[stat.ST_MTIME]
|
||||
|
||||
def _should_download_csv(
|
||||
self, csv_url: str, csv_path: str, csv_url_check_interval: int
|
||||
) -> bool:
|
||||
if not os.path.isfile(csv_path):
|
||||
self.logger.info('CSV file %s not found, downloading it', csv_path)
|
||||
return True
|
||||
|
||||
if not self._should_check_csv_url(csv_url_check_interval):
|
||||
self.logger.debug('No need to check the CSV URL %s', csv_url)
|
||||
return False
|
||||
|
||||
request = requests.head(csv_url, timeout=10)
|
||||
request.raise_for_status()
|
||||
last_modified_hdr = request.headers.get('Last-Modified')
|
||||
Variable(TORRENTS_CSV_URL_LAST_CHECKED_VAR).set(time.time())
|
||||
|
||||
if not last_modified_hdr:
|
||||
self.logger.debug(
|
||||
"No Last-Modified header found in the CSV URL, can't compare thus downloading"
|
||||
)
|
||||
return True
|
||||
|
||||
return (
|
||||
time.mktime(time.strptime(last_modified_hdr, '%a, %d %b %Y %H:%M:%S %Z'))
|
||||
> os.stat(csv_path)[stat.ST_MTIME]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _should_check_csv_url(csv_url_check_interval: int) -> bool:
|
||||
last_checked = round(
|
||||
float(Variable(TORRENTS_CSV_URL_LAST_CHECKED_VAR).get() or 0)
|
||||
)
|
||||
return bool(
|
||||
csv_url_check_interval
|
||||
and time.time() - last_checked > csv_url_check_interval
|
||||
)
|
||||
|
||||
def _search(
|
||||
self, query: str, *_, limit: int, page: int, **__
|
||||
) -> List[TorrentSearchResult]:
|
||||
self.logger.debug(
|
||||
"Searching for %r on %s, limit=%d, page=%d",
|
||||
query,
|
||||
self.db_path,
|
||||
limit,
|
||||
page,
|
||||
)
|
||||
|
||||
tokens = re.split(r'[^\w]', query.lower())
|
||||
where = ' and '.join(
|
||||
f'lower(name) like :token{i}' for i, _ in enumerate(tokens)
|
||||
)
|
||||
tokens = {f'token{i}': f'%{token}%' for i, token in enumerate(tokens)}
|
||||
|
||||
with self._get_engine().connect() as conn:
|
||||
self.logger.debug('Connected to the database: %s', conn.engine.url)
|
||||
results = conn.execute(
|
||||
text(
|
||||
f"""
|
||||
select infohash, name, size_bytes, seeders, leechers, created_unix
|
||||
from torrent
|
||||
where {where}
|
||||
order by seeders desc, created_unix desc
|
||||
limit :limit
|
||||
offset :offset
|
||||
"""
|
||||
),
|
||||
{
|
||||
**tokens,
|
||||
'limit': max(int(limit), 0),
|
||||
'offset': max(int(limit * (page - 1)), 0),
|
||||
},
|
||||
).all()
|
||||
|
||||
self.logger.debug('Found %d results', len(results))
|
||||
return [
|
||||
TorrentSearchResult(
|
||||
title=result[1],
|
||||
url=self._to_magnet(
|
||||
info_hash=result[0],
|
||||
torrent_name=result[1],
|
||||
),
|
||||
size=result[2],
|
||||
seeds=int(result[3] or 0),
|
||||
peers=int(result[4] or 0),
|
||||
created_at=(
|
||||
dt.datetime.fromtimestamp(result[5]).replace(tzinfo=dt.timezone.utc)
|
||||
if result[5]
|
||||
else None
|
||||
),
|
||||
)
|
||||
for result in results
|
||||
]
|
|
@ -3,16 +3,20 @@
|
|||
"events": {},
|
||||
"install": {
|
||||
"apk": [
|
||||
"py3-libtorrent-rasterbar"
|
||||
"py3-libtorrent-rasterbar",
|
||||
"sqlite3"
|
||||
],
|
||||
"apt": [
|
||||
"python3-libtorrent"
|
||||
"python3-libtorrent",
|
||||
"sqlite3"
|
||||
],
|
||||
"dnf": [
|
||||
"rb_libtorrent-python3"
|
||||
"rb_libtorrent-python3",
|
||||
"sqlite3"
|
||||
],
|
||||
"pacman": [
|
||||
"libtorrent-rasterbar"
|
||||
"libtorrent-rasterbar",
|
||||
"sqlite3"
|
||||
],
|
||||
"pip": [
|
||||
"libtorrent"
|
||||
|
@ -21,4 +25,4 @@
|
|||
"package": "platypush.plugins.torrent",
|
||||
"type": "plugin"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue