Sync the latest parse timestamps in main instead of __init__ in rss.

We should load the latest timestamps from the db when the thread starts
instead of doing it in the constructor.

The constructor may be invoked when the entities engine hasn't been
initialized yet, and result in deadlocks.
This commit is contained in:
Fabio Manganiello 2023-08-18 15:51:11 +02:00
parent bf7d060b81
commit 5dd7345c0b
Signed by untrusted user: blacklight
GPG key ID: D90FBA7F76362774

View file

@ -4,17 +4,17 @@ import queue
import re import re
import threading import threading
import time import time
from dateutil.tz import tzutc
from typing import Iterable, Optional, Collection, Set from typing import Iterable, Optional, Collection, Set
from xml.etree import ElementTree from xml.etree import ElementTree
import dateutil.parser import dateutil.parser
from dateutil.tz import tzutc
import requests import requests
from platypush.context import get_bus, get_plugin from platypush.context import get_bus, get_plugin
from platypush.message.event.rss import NewFeedEntryEvent from platypush.message.event.rss import NewFeedEntryEvent
from platypush.plugins import RunnablePlugin, action from platypush.plugins import RunnablePlugin, action
from platypush.plugins.variable import VariablePlugin
from platypush.schemas.rss import RssFeedEntrySchema from platypush.schemas.rss import RssFeedEntrySchema
@ -61,8 +61,14 @@ class RssPlugin(RunnablePlugin):
self._latest_entries = [] self._latest_entries = []
self.subscriptions = list(self._parse_subscriptions(subscriptions or [])) self.subscriptions = list(self._parse_subscriptions(subscriptions or []))
self._latest_timestamps = {}
self._latest_timestamps = self._get_latest_timestamps() @classmethod
@property
def _variable(cls) -> VariablePlugin:
var = get_plugin(VariablePlugin)
assert var, 'Could not load the variable plugin'
return var
@staticmethod @staticmethod
def _get_feed_latest_timestamp_varname(url: str) -> str: def _get_feed_latest_timestamp_varname(url: str) -> str:
@ -70,21 +76,20 @@ class RssPlugin(RunnablePlugin):
@classmethod @classmethod
def _get_feed_latest_timestamp(cls, url: str) -> Optional[datetime.datetime]: def _get_feed_latest_timestamp(cls, url: str) -> Optional[datetime.datetime]:
t = ( varname = cls._get_feed_latest_timestamp_varname(url)
get_plugin('variable') var: dict = cls._variable.get(varname).output or {} # type: ignore
.get(cls._get_feed_latest_timestamp_varname(url)) t = var.get(varname)
.output.get(cls._get_feed_latest_timestamp_varname(url))
)
if t: if t:
return dateutil.parser.isoparse(t) return dateutil.parser.isoparse(t)
return None
def _get_latest_timestamps(self) -> dict: def _get_latest_timestamps(self) -> dict:
return {url: self._get_feed_latest_timestamp(url) for url in self.subscriptions} return {url: self._get_feed_latest_timestamp(url) for url in self.subscriptions}
def _update_latest_timestamps(self) -> None: def _update_latest_timestamps(self) -> None:
variable = get_plugin('variable') self._variable.set(
variable.set(
**{ **{
self._get_feed_latest_timestamp_varname(url): latest_timestamp self._get_feed_latest_timestamp_varname(url): latest_timestamp
for url, latest_timestamp in self._latest_timestamps.items() for url, latest_timestamp in self._latest_timestamps.items()
@ -95,7 +100,7 @@ class RssPlugin(RunnablePlugin):
def _parse_content(entry) -> Optional[str]: def _parse_content(entry) -> Optional[str]:
content = getattr(entry, 'content', None) content = getattr(entry, 'content', None)
if not content: if not content:
return return None
if isinstance(content, list): if isinstance(content, list):
return content[0]['value'] return content[0]['value']
@ -139,7 +144,7 @@ class RssPlugin(RunnablePlugin):
for entry in feed.entries for entry in feed.entries
if getattr(entry, 'published_parsed', None) if getattr(entry, 'published_parsed', None)
], ],
key=lambda e: e['published'], key=lambda e: e['published'], # type: ignore
), ),
many=True, many=True,
) )
@ -316,6 +321,7 @@ class RssPlugin(RunnablePlugin):
return ElementTree.tostring(root, encoding='utf-8', method='xml').decode() return ElementTree.tostring(root, encoding='utf-8', method='xml').decode()
def main(self): def main(self):
self._latest_timestamps = self._get_latest_timestamps()
self._feed_workers = [ self._feed_workers = [
threading.Thread(target=self._feed_worker, args=(q,)) threading.Thread(target=self._feed_worker, args=(q,))
for q in self._feed_worker_queues for q in self._feed_worker_queues