[#407] Implemented torrent.csv backend
This commit is contained in:
parent
8fc3201b8c
commit
91e2530dd5
10 changed files with 648 additions and 49 deletions
|
@ -30,6 +30,86 @@ from ._search import TorrentSearchProvider
|
||||||
class TorrentPlugin(Plugin):
|
class TorrentPlugin(Plugin):
|
||||||
"""
|
"""
|
||||||
Plugin to search and download torrents.
|
Plugin to search and download torrents.
|
||||||
|
|
||||||
|
Search
|
||||||
|
------
|
||||||
|
|
||||||
|
You can search for torrents using the :meth:`search` method. The method will
|
||||||
|
use the search providers configured in the ``search_providers`` attribute of
|
||||||
|
the plugin configuration. Currently supported search providers:
|
||||||
|
|
||||||
|
* ``popcorntime`:
|
||||||
|
:class:`platypush.plugins.torrent._search.PopcornTimeSearchProvider`
|
||||||
|
* ``torrents.csv``:
|
||||||
|
:class:`platypush.plugins.torrent._search.TorrentsCsvSearchProvider`
|
||||||
|
|
||||||
|
``torrents.csv`` will be enabled by default unless you explicitly disable
|
||||||
|
it. ``torrents.csv`` also supports both:
|
||||||
|
|
||||||
|
* A remote API via the ``api_url`` attribute (default:
|
||||||
|
`https://torrents-csv.com/service``). You can also run your own API
|
||||||
|
server by following the instructions at `heretic/torrents-csv-server
|
||||||
|
<https://git.torrents-csv.com/heretic/torrents-csv-server>`_.
|
||||||
|
|
||||||
|
* A local checkout of the ``torrents.csv`` file. Clone the
|
||||||
|
`heretic/torrents-csv-data
|
||||||
|
<https://git.torrents-csv.com/heretic/torrents-csv-data>`_ and provide
|
||||||
|
the path to the ``torrents.csv`` file in the ``csv_file`` attribute.
|
||||||
|
|
||||||
|
* A local checkout of the ``torrents.db`` file built from the
|
||||||
|
``torrents.csv`` file. Follow the instructions at
|
||||||
|
`heretic/torrents-csv-data
|
||||||
|
<https://git.torrents-csv.com/heretic/torrents-csv-data>`_ on how to
|
||||||
|
build the ``torrents.db`` file from the ``torrents.csv`` file.
|
||||||
|
|
||||||
|
If you opt for a local checkout of the ``torrents.csv`` file, then
|
||||||
|
Platypush will build the SQLite database from the CSV file for you - no need
|
||||||
|
to use external services. This however means that the first search will be
|
||||||
|
slower as the database is being built. Subsequent searches will be faster,
|
||||||
|
unless you modify the CSV file - in this case, an updated database will be
|
||||||
|
built from the latest CSV file.
|
||||||
|
|
||||||
|
You can also specify the ``download_csv`` property in the configuration. In
|
||||||
|
this case, Platypush will automatically download the latest ``torrents.csv``
|
||||||
|
file locally and build the SQLite database from it. On startup, Platypush
|
||||||
|
will check if either the local or remote CSV file has been updated, and
|
||||||
|
rebuild the database if necessary.
|
||||||
|
|
||||||
|
``popcorntime`` will be disabled by default unless you explicitly enable it.
|
||||||
|
That's because, at the time of writing (June 2024), there are no publicly
|
||||||
|
available PopcornTime API servers. You can run your own PopcornTime API
|
||||||
|
server by following the instructions at `popcorn-time-ru/popcorn-ru
|
||||||
|
<https://github.com/popcorn-time-ru/popcorn-ru>`_.
|
||||||
|
|
||||||
|
Configuration example:
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
torrent:
|
||||||
|
# ...
|
||||||
|
|
||||||
|
search_providers:
|
||||||
|
torrents.csv:
|
||||||
|
# Default: True
|
||||||
|
# enabled: true
|
||||||
|
# Base URL of the torrents.csv API.
|
||||||
|
api_url: https://torrents-csv.com/service
|
||||||
|
|
||||||
|
# Alternatively, you can also use a local checkout of the
|
||||||
|
# torrents.csv file.
|
||||||
|
# csv_file: /path/to/torrents.csv
|
||||||
|
|
||||||
|
# Or a manually built SQLite database from the torrents.csv file.
|
||||||
|
# db_file: /path/to/torrents.db
|
||||||
|
|
||||||
|
# Or automatically download the latest torrents.csv file.
|
||||||
|
# download_csv: true
|
||||||
|
popcorn_time:
|
||||||
|
# Default: false
|
||||||
|
# enabled: false
|
||||||
|
# Required: PopcornTime API base URL.
|
||||||
|
api_url: https://popcorntime.app
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_http_timeout = 20
|
_http_timeout = 20
|
||||||
|
@ -54,41 +134,7 @@ class TorrentPlugin(Plugin):
|
||||||
:param download_dir: Directory where the videos/torrents will be
|
:param download_dir: Directory where the videos/torrents will be
|
||||||
downloaded (default: ``~/Downloads``).
|
downloaded (default: ``~/Downloads``).
|
||||||
:param torrent_ports: Torrent ports to listen on (default: 6881 and 6891)
|
:param torrent_ports: Torrent ports to listen on (default: 6881 and 6891)
|
||||||
:param search_providers: List of search providers to use. Each provider
|
:param search_providers: List of search providers to use.
|
||||||
has its own supported configuration and needs to be an instance of
|
|
||||||
:class:`TorrentSearchProvider`. Currently supported providers:
|
|
||||||
|
|
||||||
* :class:`platypush.plugins.torrent._search.PopcornTimeSearchProvider`
|
|
||||||
* :class:`platypush.plugins.torrent._search.TorrentCsvSearchProvider`
|
|
||||||
|
|
||||||
Configuration example:
|
|
||||||
|
|
||||||
.. code-block:: yaml
|
|
||||||
|
|
||||||
torrent:
|
|
||||||
# ...
|
|
||||||
|
|
||||||
search_providers:
|
|
||||||
torrent_csv:
|
|
||||||
# Default: True
|
|
||||||
# enabled: true
|
|
||||||
# Base URL of the torrent-csv API.
|
|
||||||
# See https://git.torrents-csv.com/heretic/torrents-csv-server
|
|
||||||
# for how to run your own torrent-csv API server.
|
|
||||||
api_url: https://torrents-csv.com/service
|
|
||||||
# Alternatively, you can also use a local checkout of the
|
|
||||||
# torrent.csv file. Clone
|
|
||||||
# https://git.torrents-csv.com/heretic/torrents-csv-data
|
|
||||||
# and provide the path to the torrent.csv file here.
|
|
||||||
# csv_file: /path/to/torrent.csv
|
|
||||||
popcorn_time:
|
|
||||||
# Default: False
|
|
||||||
# enabled: false
|
|
||||||
# Required: PopcornTime API base URL.
|
|
||||||
# See https://github.com/popcorn-time-ru/popcorn-ru for
|
|
||||||
# how to run your own PopcornTime API server.
|
|
||||||
api_url: https://popcorntime.app
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
@ -96,7 +142,13 @@ class TorrentPlugin(Plugin):
|
||||||
self.download_dir = os.path.abspath(
|
self.download_dir = os.path.abspath(
|
||||||
os.path.expanduser(download_dir or get_default_downloads_dir())
|
os.path.expanduser(download_dir or get_default_downloads_dir())
|
||||||
)
|
)
|
||||||
|
|
||||||
self._search_providers = self._load_search_providers(search_providers)
|
self._search_providers = self._load_search_providers(search_providers)
|
||||||
|
self.logger.info(
|
||||||
|
'Loaded search providers: %s',
|
||||||
|
[provider.provider_name() for provider in self._search_providers],
|
||||||
|
)
|
||||||
|
|
||||||
self._sessions = {}
|
self._sessions = {}
|
||||||
self._lt_session = None
|
self._lt_session = None
|
||||||
pathlib.Path(self.download_dir).mkdir(parents=True, exist_ok=True)
|
pathlib.Path(self.download_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
@ -107,18 +159,22 @@ class TorrentPlugin(Plugin):
|
||||||
Union[Dict[str, dict], Iterable[TorrentSearchProvider]]
|
Union[Dict[str, dict], Iterable[TorrentSearchProvider]]
|
||||||
],
|
],
|
||||||
) -> Iterable[TorrentSearchProvider]:
|
) -> Iterable[TorrentSearchProvider]:
|
||||||
|
provider_classes = {
|
||||||
|
cls.provider_name(): cls
|
||||||
|
for _, cls in inspect.getmembers(search_module, inspect.isclass)
|
||||||
|
if issubclass(cls, TorrentSearchProvider) and cls != TorrentSearchProvider
|
||||||
|
}
|
||||||
|
|
||||||
if not search_providers:
|
if not search_providers:
|
||||||
return []
|
return [
|
||||||
|
provider()
|
||||||
|
for provider in provider_classes.values()
|
||||||
|
if provider.default_enabled()
|
||||||
|
]
|
||||||
|
|
||||||
parsed_providers = []
|
parsed_providers = []
|
||||||
if isinstance(search_providers, dict):
|
if isinstance(search_providers, dict):
|
||||||
providers_dict = {}
|
providers_dict = {}
|
||||||
provider_classes = {
|
|
||||||
cls.provider_name(): cls
|
|
||||||
for _, cls in inspect.getmembers(search_module, inspect.isclass)
|
|
||||||
if issubclass(cls, TorrentSearchProvider)
|
|
||||||
and cls != TorrentSearchProvider
|
|
||||||
}
|
|
||||||
|
|
||||||
# Configure the search providers explicitly passed in the configuration
|
# Configure the search providers explicitly passed in the configuration
|
||||||
for provider_name, provider_config in search_providers.items():
|
for provider_name, provider_config in search_providers.items():
|
||||||
|
@ -523,7 +579,9 @@ class TorrentPlugin(Plugin):
|
||||||
:type torrent: str
|
:type torrent: str
|
||||||
"""
|
"""
|
||||||
|
|
||||||
assert torrent in self.transfers, f"No transfer in progress for {torrent}"
|
if not self.transfers.get(torrent):
|
||||||
|
self.logger.info('No transfer in progress for %s', torrent)
|
||||||
|
return
|
||||||
|
|
||||||
self.transfers[torrent].pause()
|
self.transfers[torrent].pause()
|
||||||
del self.torrent_state[torrent]
|
del self.torrent_state[torrent]
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
from ._base import TorrentSearchProvider
|
from ._base import TorrentSearchProvider
|
||||||
from ._popcorntime import PopcornTimeSearchProvider
|
from ._popcorntime import PopcornTimeSearchProvider
|
||||||
|
from ._torrents_csv import TorrentsCsvSearchProvider
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'TorrentSearchProvider',
|
'TorrentSearchProvider',
|
||||||
'PopcornTimeSearchProvider',
|
'PopcornTimeSearchProvider',
|
||||||
|
'TorrentsCsvSearchProvider',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ class TorrentSearchProvider(ABC):
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
self.logger.debug('Searching for %r', query)
|
self.logger.debug("Searching for %r", query)
|
||||||
return self._search(query, *args, **kwargs)
|
return self._search(query, *args, **kwargs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
from ._facade import TorrentsCsvSearchProvider
|
||||||
|
|
||||||
|
__all__ = ['TorrentsCsvSearchProvider']
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
100
platypush/plugins/torrent/_search/_torrents_csv/_base.py
Normal file
100
platypush/plugins/torrent/_search/_torrents_csv/_base.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from logging import getLogger
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
from .._base import TorrentSearchProvider
|
||||||
|
from .._model import TorrentSearchResult
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentsCsvBaseProvider(TorrentSearchProvider, ABC):
|
||||||
|
"""
|
||||||
|
Base class for Torrents.csv search providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_http_timeout = 20
|
||||||
|
_magnet_trackers = [
|
||||||
|
'http://125.227.35.196:6969/announce',
|
||||||
|
'http://210.244.71.25:6969/announce',
|
||||||
|
'http://210.244.71.26:6969/announce',
|
||||||
|
'http://213.159.215.198:6970/announce',
|
||||||
|
'http://37.19.5.139:6969/announce',
|
||||||
|
'http://37.19.5.155:6881/announce',
|
||||||
|
'http://87.248.186.252:8080/announce',
|
||||||
|
'http://asmlocator.ru:34000/1hfZS1k4jh/announce',
|
||||||
|
'http://bt.evrl.to/announce',
|
||||||
|
'http://bt.rutracker.org/ann',
|
||||||
|
'https://www.artikelplanet.nl',
|
||||||
|
'http://mgtracker.org:6969/announce',
|
||||||
|
'http://tracker.baravik.org:6970/announce',
|
||||||
|
'http://tracker.dler.org:6969/announce',
|
||||||
|
'http://tracker.filetracker.pl:8089/announce',
|
||||||
|
'http://tracker.grepler.com:6969/announce',
|
||||||
|
'http://tracker.mg64.net:6881/announce',
|
||||||
|
'http://tracker.tiny-vps.com:6969/announce',
|
||||||
|
'http://tracker.torrentyorg.pl/announce',
|
||||||
|
'https://internet.sitelio.me/',
|
||||||
|
'https://computer1.sitelio.me/',
|
||||||
|
'udp://168.235.67.63:6969',
|
||||||
|
'udp://37.19.5.155:2710',
|
||||||
|
'udp://46.148.18.250:2710',
|
||||||
|
'udp://46.4.109.148:6969',
|
||||||
|
'udp://computerbedrijven.bestelinks.nl/',
|
||||||
|
'udp://computerbedrijven.startsuper.nl/',
|
||||||
|
'udp://computershop.goedbegin.nl/',
|
||||||
|
'udp://c3t.org',
|
||||||
|
'udp://allerhandelenlaag.nl',
|
||||||
|
'udp://tracker.opentrackr.org:1337',
|
||||||
|
'udp://tracker.publicbt.com:80',
|
||||||
|
'udp://tracker.tiny-vps.com:6969',
|
||||||
|
'udp://tracker.openbittorrent.com:80',
|
||||||
|
'udp://opentor.org:2710',
|
||||||
|
'udp://tracker.ccc.de:80',
|
||||||
|
'udp://tracker.blackunicorn.xyz:6969',
|
||||||
|
'udp://tracker.coppersurfer.tk:6969',
|
||||||
|
'udp://tracker.leechers-paradise.org:6969',
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
trackers: Optional[List[str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param trackers: List of additional trackers to use.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.logger = getLogger(self.__class__.__name__)
|
||||||
|
self.trackers = list({*self._magnet_trackers, *(trackers or [])})
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def provider_name(cls) -> str:
|
||||||
|
return 'torrents.csv'
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _search( # pylint: disable=arguments-differ
|
||||||
|
self, query: str, *_, limit: int, page: int, **__
|
||||||
|
) -> List[TorrentSearchResult]:
|
||||||
|
"""
|
||||||
|
To be implemented by subclasses.
|
||||||
|
|
||||||
|
:param query: Query string.
|
||||||
|
:param limit: Number of results to return (default: 25).
|
||||||
|
:param page: Page number (default: 1).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _to_magnet(self, info_hash: str, torrent_name: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a magnet link from an info hash and torrent name.
|
||||||
|
|
||||||
|
:param info_hash: Torrent info hash.
|
||||||
|
:param torrent_name: Torrent name.
|
||||||
|
:return: Magnet link.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
f'magnet:?xt=urn:btih:{info_hash}&dn={quote_plus(torrent_name)}&tr='
|
||||||
|
+ '&tr='.join([quote_plus(tracker) for tracker in self.trackers])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
|
@ -0,0 +1,15 @@
|
||||||
|
"""
|
||||||
|
Common Torrents CSV constants.
|
||||||
|
"""
|
||||||
|
|
||||||
|
TORRENT_CSV_API_URL = 'https://torrents-csv.com/service'
|
||||||
|
""" Default Torrents CSV API base URL. """
|
||||||
|
|
||||||
|
TORRENTS_CSV_DOWNLOAD_URL = 'https://git.torrents-csv.com/heretic/torrents-csv-data/raw/branch/main/torrents.csv'
|
||||||
|
""" Default torrents.csv download URL. """
|
||||||
|
|
||||||
|
TORRENTS_CSV_URL_LAST_CHECKED_VAR = '_TORRENTS_CSV_URL_LAST_CHECKED'
|
||||||
|
""" Environment variable to store the last checked timestamp for the torrents.csv URL. """
|
||||||
|
|
||||||
|
TORRENTS_CSV_DEFAULT_CHECK_INTERVAL = 60 * 60 * 24
|
||||||
|
""" Interval in seconds to re-check the torrents.csv URL (24 hours). """
|
84
platypush/plugins/torrent/_search/_torrents_csv/_facade.py
Normal file
84
platypush/plugins/torrent/_search/_torrents_csv/_facade.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .._model import TorrentSearchResult
|
||||||
|
from ._base import TorrentsCsvBaseProvider
|
||||||
|
from ._constants import (
|
||||||
|
TORRENT_CSV_API_URL,
|
||||||
|
TORRENTS_CSV_DOWNLOAD_URL,
|
||||||
|
TORRENTS_CSV_DEFAULT_CHECK_INTERVAL,
|
||||||
|
)
|
||||||
|
from .api import TorrentsCsvAPIProvider
|
||||||
|
from .local import TorrentsCsvLocalProvider
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentsCsvSearchProvider(TorrentsCsvBaseProvider):
|
||||||
|
"""
|
||||||
|
Torrent that uses `Torrents.csv <https://torrents-csv.com/>`_ to search
|
||||||
|
for torrents, either by using the API or by leveraging a local database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_url: str = TORRENT_CSV_API_URL,
|
||||||
|
csv_url: str = TORRENTS_CSV_DOWNLOAD_URL,
|
||||||
|
download_csv: bool = False,
|
||||||
|
csv_path: Optional[str] = None,
|
||||||
|
db_path: Optional[str] = None,
|
||||||
|
csv_url_check_interval: int = TORRENTS_CSV_DEFAULT_CHECK_INTERVAL,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param api_url: Torrents.csv API URL.
|
||||||
|
:param csv_url: Torrents.csv CSV URL.
|
||||||
|
:param download_csv: Whether to download the CSV file.
|
||||||
|
:param csv_path: Path to the CSV file.
|
||||||
|
:param db_path: Path to the SQLite database file.
|
||||||
|
:param csv_url_check_interval: Interval to check for CSV updates.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.api_url = api_url
|
||||||
|
self.csv_url = csv_url
|
||||||
|
self.download_csv = download_csv
|
||||||
|
self.csv_path = os.path.expanduser(csv_path) if csv_path else None
|
||||||
|
self.db_path = os.path.expanduser(db_path) if db_path else None
|
||||||
|
self.csv_url_check_interval = csv_url_check_interval
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _delegate(self) -> TorrentsCsvBaseProvider:
|
||||||
|
"""
|
||||||
|
:return: The provider to delegate the search to.
|
||||||
|
"""
|
||||||
|
if self.download_csv or self.csv_path or self.db_path:
|
||||||
|
return TorrentsCsvLocalProvider(
|
||||||
|
download_csv=self.download_csv,
|
||||||
|
csv_url=self.csv_url,
|
||||||
|
csv_path=self.csv_path,
|
||||||
|
db_path=self.db_path,
|
||||||
|
csv_url_check_interval=self.csv_url_check_interval,
|
||||||
|
enabled=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return TorrentsCsvAPIProvider(api_url=self.api_url, enabled=True)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def default_enabled(cls) -> bool:
|
||||||
|
"""
|
||||||
|
This provider is enabled by default.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _search(
|
||||||
|
self, query: str, *_, limit: int = 25, page: int = 1, **__
|
||||||
|
) -> List[TorrentSearchResult]:
|
||||||
|
"""
|
||||||
|
Perform a search of torrents using the Torrent.csv API.
|
||||||
|
|
||||||
|
:param query: Query string.
|
||||||
|
:param limit: Number of results to return (default: 25).
|
||||||
|
:param page: Page number (default: 1).
|
||||||
|
"""
|
||||||
|
return list(self._delegate.search(query=query, limit=limit, page=page))
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
59
platypush/plugins/torrent/_search/_torrents_csv/api.py
Normal file
59
platypush/plugins/torrent/_search/_torrents_csv/api.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .._model import TorrentSearchResult
|
||||||
|
from ._base import TorrentsCsvBaseProvider
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentsCsvAPIProvider(TorrentsCsvBaseProvider):
|
||||||
|
"""
|
||||||
|
Torrent that uses `Torrents.csv <https://torrents-csv.com/>`_ or any other
|
||||||
|
`Torrents.csv API <https://torrents-csv.com/service>`_ instance to search
|
||||||
|
for torrents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, api_url: str, **kwargs):
|
||||||
|
"""
|
||||||
|
:param api_url: Torrents.csv API base URL.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.api_url = api_url
|
||||||
|
|
||||||
|
def _search(
|
||||||
|
self, query: str, *_, limit: int, page: int, **__
|
||||||
|
) -> List[TorrentSearchResult]:
|
||||||
|
"""
|
||||||
|
Perform a search of torrents using the Torrent.csv API.
|
||||||
|
|
||||||
|
:param query: Query string.
|
||||||
|
:param limit: Number of results to return (default: 25).
|
||||||
|
:param page: Page number (default: 1).
|
||||||
|
"""
|
||||||
|
response = requests.get(
|
||||||
|
f'{self.api_url}/search',
|
||||||
|
params={
|
||||||
|
'q': query,
|
||||||
|
'size': limit,
|
||||||
|
'page': page,
|
||||||
|
},
|
||||||
|
timeout=self._http_timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
return [
|
||||||
|
TorrentSearchResult(
|
||||||
|
title=torrent.get('name', '[No Title]'),
|
||||||
|
url=self._to_magnet(
|
||||||
|
info_hash=torrent.get('infohash'), torrent_name=torrent.get('name')
|
||||||
|
),
|
||||||
|
size=torrent.get('size_bytes'),
|
||||||
|
created_at=torrent.get('created_unix'),
|
||||||
|
seeds=torrent.get('seeders'),
|
||||||
|
peers=torrent.get('leechers'),
|
||||||
|
)
|
||||||
|
for torrent in response.json().get('torrents', [])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
272
platypush/plugins/torrent/_search/_torrents_csv/local.py
Normal file
272
platypush/plugins/torrent/_search/_torrents_csv/local.py
Normal file
|
@ -0,0 +1,272 @@
|
||||||
|
import datetime as dt
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
import stat
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from threading import RLock
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
|
||||||
|
from platypush.config import Config
|
||||||
|
from platypush.context import Variable
|
||||||
|
|
||||||
|
from .._model import TorrentSearchResult
|
||||||
|
from ._base import TorrentsCsvBaseProvider
|
||||||
|
from ._constants import TORRENTS_CSV_URL_LAST_CHECKED_VAR
|
||||||
|
|
||||||
|
SQL_INIT_TEMPLATE = """
|
||||||
|
create table torrent_tmp (
|
||||||
|
infohash text primary key,
|
||||||
|
name text not null,
|
||||||
|
size_bytes integer not null,
|
||||||
|
created_unix integer(4) not null,
|
||||||
|
seeders integer not null,
|
||||||
|
leechers integer not null,
|
||||||
|
completed integer not null,
|
||||||
|
scraped_date integer(4) not null,
|
||||||
|
published integer(4) not null
|
||||||
|
);
|
||||||
|
|
||||||
|
.separator ,
|
||||||
|
.import --skip 1 '{csv_file}' torrent_tmp
|
||||||
|
|
||||||
|
create index idx_name on torrent_tmp(lower(name));
|
||||||
|
create index idx_seeders on torrent_tmp(seeders);
|
||||||
|
create index idx_created_unix on torrent_tmp(created_unix);
|
||||||
|
|
||||||
|
drop table if exists torrent;
|
||||||
|
alter table torrent_tmp rename to torrent;
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentsCsvLocalProvider(TorrentsCsvBaseProvider):
|
||||||
|
"""
|
||||||
|
This class is responsible for managing a local checkout of the torrents-csv
|
||||||
|
dataset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
download_csv: bool,
|
||||||
|
csv_url: str,
|
||||||
|
csv_url_check_interval: int,
|
||||||
|
csv_path: Optional[str] = None,
|
||||||
|
db_path: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Note that at least one among ``download_csv``, ``csv_path`` and ``db_path``
|
||||||
|
should be provided.
|
||||||
|
|
||||||
|
:param download_csv: If True then the CSV file will be downloaded from the
|
||||||
|
specified ``csv_url``.
|
||||||
|
:param csv_url: The URL from which the CSV file will be downloaded.
|
||||||
|
:param csv_url_check_interval: The interval in seconds after which the CSV
|
||||||
|
should be checked for updates.
|
||||||
|
:param csv_path: The path to the CSV file. If not provided, and download_csv
|
||||||
|
is set to True, then the CSV file will be downloaded to
|
||||||
|
``<WORKDIR>/torrent/torrents.csv``.
|
||||||
|
:param db_path: The path to the SQLite database. If not provided, and
|
||||||
|
``csv_path`` or ``download_csv`` are set, then the database will be created
|
||||||
|
from a local copy of the CSV file.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
assert (
|
||||||
|
download_csv or csv_path or db_path
|
||||||
|
), 'You must provide either download_csv, csv_path or db_path'
|
||||||
|
|
||||||
|
self._init_csv_lock = RLock()
|
||||||
|
self._init_db_lock = RLock()
|
||||||
|
self._csv_url_check_interval = csv_url_check_interval
|
||||||
|
|
||||||
|
if download_csv:
|
||||||
|
csv_path = (
|
||||||
|
os.path.expanduser(csv_path)
|
||||||
|
if csv_path
|
||||||
|
else os.path.join(Config.get_workdir(), 'torrent', 'torrents.csv')
|
||||||
|
)
|
||||||
|
|
||||||
|
with self._init_csv_lock:
|
||||||
|
self._download_csv(csv_url=csv_url, csv_path=csv_path)
|
||||||
|
|
||||||
|
if csv_path:
|
||||||
|
db_path = (
|
||||||
|
os.path.expanduser(db_path)
|
||||||
|
if db_path
|
||||||
|
else os.path.join(os.path.dirname(csv_path), 'torrents.db')
|
||||||
|
)
|
||||||
|
|
||||||
|
with self._init_db_lock:
|
||||||
|
self._build_db(csv_path=csv_path, db_path=db_path)
|
||||||
|
|
||||||
|
assert db_path, 'No download_csv, csv_path or db_path provided'
|
||||||
|
assert os.path.isfile(db_path), f'Invalid db_path: {db_path}'
|
||||||
|
self.db_path = db_path
|
||||||
|
|
||||||
|
def _get_engine(self):
|
||||||
|
return create_engine(
|
||||||
|
'sqlite:///' + ('/'.join(map(quote_plus, self.db_path.split(os.path.sep))))
|
||||||
|
)
|
||||||
|
|
||||||
|
def _download_csv(self, csv_url: str, csv_path: str):
|
||||||
|
if not self._should_download_csv(
|
||||||
|
csv_url=csv_url,
|
||||||
|
csv_path=csv_path,
|
||||||
|
csv_url_check_interval=self._csv_url_check_interval,
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
'Downloading the torrents CSV file from %s to %s', csv_url, csv_path
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.get(csv_url, stream=True, timeout=60)
|
||||||
|
response.raise_for_status()
|
||||||
|
size = int(response.headers.get('Content-Length', 0))
|
||||||
|
torrents_csv_dir = os.path.dirname(csv_path)
|
||||||
|
pathlib.Path(torrents_csv_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(csv_path, 'wb') as f:
|
||||||
|
written = 0
|
||||||
|
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
written += len(chunk)
|
||||||
|
if size:
|
||||||
|
percent = 100.0 * written / size
|
||||||
|
prev_percent = max(0, 100.0 * (written - len(chunk)) / size)
|
||||||
|
if round(percent / 5) > round(prev_percent / 5):
|
||||||
|
self.logger.info('... %.2f%%\r', percent)
|
||||||
|
|
||||||
|
self.logger.info('Downloaded the torrents CSV file to %s', csv_path)
|
||||||
|
|
||||||
|
def _build_db(self, csv_path: str, db_path: str):
|
||||||
|
if not self._should_update_db(csv_path, db_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
'Refreshing SQLite database %s from CSV file %s', db_path, csv_path
|
||||||
|
)
|
||||||
|
|
||||||
|
db_dir = os.path.dirname(db_path)
|
||||||
|
pathlib.Path(db_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with subprocess.Popen(
|
||||||
|
['sqlite3', db_path], stdin=subprocess.PIPE, text=True
|
||||||
|
) as proc:
|
||||||
|
proc.communicate(SQL_INIT_TEMPLATE.format(csv_file=csv_path))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
'Refreshed SQLite database %s from CSV file %s: ready to search',
|
||||||
|
db_path,
|
||||||
|
csv_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _should_update_db(csv_path: str, db_path: str) -> bool:
|
||||||
|
if not os.path.isfile(csv_path):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not os.path.isfile(db_path):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return os.stat(db_path)[stat.ST_MTIME] < os.stat(csv_path)[stat.ST_MTIME]
|
||||||
|
|
||||||
|
def _should_download_csv(
|
||||||
|
self, csv_url: str, csv_path: str, csv_url_check_interval: int
|
||||||
|
) -> bool:
|
||||||
|
if not os.path.isfile(csv_path):
|
||||||
|
self.logger.info('CSV file %s not found, downloading it', csv_path)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not self._should_check_csv_url(csv_url_check_interval):
|
||||||
|
self.logger.debug('No need to check the CSV URL %s', csv_url)
|
||||||
|
return False
|
||||||
|
|
||||||
|
request = requests.head(csv_url, timeout=10)
|
||||||
|
request.raise_for_status()
|
||||||
|
last_modified_hdr = request.headers.get('Last-Modified')
|
||||||
|
Variable(TORRENTS_CSV_URL_LAST_CHECKED_VAR).set(time.time())
|
||||||
|
|
||||||
|
if not last_modified_hdr:
|
||||||
|
self.logger.debug(
|
||||||
|
"No Last-Modified header found in the CSV URL, can't compare thus downloading"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return (
|
||||||
|
time.mktime(time.strptime(last_modified_hdr, '%a, %d %b %Y %H:%M:%S %Z'))
|
||||||
|
> os.stat(csv_path)[stat.ST_MTIME]
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _should_check_csv_url(csv_url_check_interval: int) -> bool:
|
||||||
|
last_checked = round(
|
||||||
|
float(Variable(TORRENTS_CSV_URL_LAST_CHECKED_VAR).get() or 0)
|
||||||
|
)
|
||||||
|
return bool(
|
||||||
|
csv_url_check_interval
|
||||||
|
and time.time() - last_checked > csv_url_check_interval
|
||||||
|
)
|
||||||
|
|
||||||
|
def _search(
|
||||||
|
self, query: str, *_, limit: int, page: int, **__
|
||||||
|
) -> List[TorrentSearchResult]:
|
||||||
|
self.logger.debug(
|
||||||
|
"Searching for %r on %s, limit=%d, page=%d",
|
||||||
|
query,
|
||||||
|
self.db_path,
|
||||||
|
limit,
|
||||||
|
page,
|
||||||
|
)
|
||||||
|
|
||||||
|
tokens = re.split(r'[^\w]', query.lower())
|
||||||
|
where = ' and '.join(
|
||||||
|
f'lower(name) like :token{i}' for i, _ in enumerate(tokens)
|
||||||
|
)
|
||||||
|
tokens = {f'token{i}': f'%{token}%' for i, token in enumerate(tokens)}
|
||||||
|
|
||||||
|
with self._get_engine().connect() as conn:
|
||||||
|
self.logger.debug('Connected to the database: %s', conn.engine.url)
|
||||||
|
results = conn.execute(
|
||||||
|
text(
|
||||||
|
f"""
|
||||||
|
select infohash, name, size_bytes, seeders, leechers, created_unix
|
||||||
|
from torrent
|
||||||
|
where {where}
|
||||||
|
order by seeders desc, created_unix desc
|
||||||
|
limit :limit
|
||||||
|
offset :offset
|
||||||
|
"""
|
||||||
|
),
|
||||||
|
{
|
||||||
|
**tokens,
|
||||||
|
'limit': max(int(limit), 0),
|
||||||
|
'offset': max(int(limit * (page - 1)), 0),
|
||||||
|
},
|
||||||
|
).all()
|
||||||
|
|
||||||
|
self.logger.debug('Found %d results', len(results))
|
||||||
|
return [
|
||||||
|
TorrentSearchResult(
|
||||||
|
title=result[1],
|
||||||
|
url=self._to_magnet(
|
||||||
|
info_hash=result[0],
|
||||||
|
torrent_name=result[1],
|
||||||
|
),
|
||||||
|
size=result[2],
|
||||||
|
seeds=int(result[3] or 0),
|
||||||
|
peers=int(result[4] or 0),
|
||||||
|
created_at=(
|
||||||
|
dt.datetime.fromtimestamp(result[5]).replace(tzinfo=dt.timezone.utc)
|
||||||
|
if result[5]
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for result in results
|
||||||
|
]
|
|
@ -3,16 +3,20 @@
|
||||||
"events": {},
|
"events": {},
|
||||||
"install": {
|
"install": {
|
||||||
"apk": [
|
"apk": [
|
||||||
"py3-libtorrent-rasterbar"
|
"py3-libtorrent-rasterbar",
|
||||||
|
"sqlite3"
|
||||||
],
|
],
|
||||||
"apt": [
|
"apt": [
|
||||||
"python3-libtorrent"
|
"python3-libtorrent",
|
||||||
|
"sqlite3"
|
||||||
],
|
],
|
||||||
"dnf": [
|
"dnf": [
|
||||||
"rb_libtorrent-python3"
|
"rb_libtorrent-python3",
|
||||||
|
"sqlite3"
|
||||||
],
|
],
|
||||||
"pacman": [
|
"pacman": [
|
||||||
"libtorrent-rasterbar"
|
"libtorrent-rasterbar",
|
||||||
|
"sqlite3"
|
||||||
],
|
],
|
||||||
"pip": [
|
"pip": [
|
||||||
"libtorrent"
|
"libtorrent"
|
||||||
|
|
Loading…
Reference in a new issue