diff --git a/platypush/backend/http/request/rss/__init__.py b/platypush/backend/http/request/rss/__init__.py index 2f3ff6c3..904e8c79 100644 --- a/platypush/backend/http/request/rss/__init__.py +++ b/platypush/backend/http/request/rss/__init__.py @@ -20,24 +20,73 @@ Session = scoped_session(sessionmaker()) class RssUpdates(HttpRequest): """ - Gets new items in an RSS feed + Gets new items in an RSS feed. You can use this type of object within the context of the + :class:`platypush.backend.http.poll.HttpPollBackend` backend. Example: + + .. code-block:: yaml + + backend.http.poll: + requests: + - type: platypush.backend.http.request.rss.RssUpdates + url: https://www.technologyreview.com/feed/ + title: MIT Technology Review + poll_seconds: 86400 # Poll once a day + digest_format: html # Generate an HTML feed with the new items + + Triggers: + + - :class:`platypush.message.event.http.rss.NewFeedEvent` when new items are parsed from a feed or a new digest + is available. Requires: * **feedparser** (``pip install feedparser``) + """ user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' + \ 'Chrome/62.0.3202.94 Safari/537.36' def __init__(self, url, title=None, headers=None, params=None, max_entries=None, - extract_content=False, digest_format=None, css_style=None, *argv, **kwargs): + extract_content=False, digest_format=None, user_agent: str = user_agent, + body_style: str = 'font-size: 22px; ' + + 'font-family: "Merriweather", Georgia, "Times New Roman", Times, serif;', + title_style: str = 'margin-top: 30px', + subtitle_style: str = 'margin-top: 10px; page-break-after: always', + article_title_style: str = 'page-break-before: always', + article_link_style: str = 'color: #555; text-decoration: none; border-bottom: 1px dotted', + article_content_style: str = '', *argv, **kwargs): + """ + :param url: URL to the RSS feed to be monitored. + :param title: Optional title for the feed. + :param headers: Extra headers to be passed to the request. + :param params: Extra GET parameters to be appended to the URL. + :param max_entries: Maximum number of entries that will be returned in a single + :class:`platypush.message.event.http.rss.NewFeedEvent` event. + :param extract_content: Whether the context should also be extracted (through the + :class:`platypush.plugins.http.webpage.HttpWebpagePlugin` plugin) (default: ``False``). + :param digest_format: Format of the digest output file (default: None, text. Other supported types: ``html`` + and ``pdf`` (requires the ``weasyprint`` module installed). + :param user_agent: User agent string to be passed on the request. + :param body_style: CSS style for the body. + :param title_style: CSS style for the feed title. + :param subtitle_style: CSS style for the feed subtitle. + :param article_title_style: CSS style for the article titles. + :param article_link_style: CSS style for the article link. + :param article_content_style: CSS style for the article content. + """ self.workdir = os.path.join(os.path.expanduser(Config.get('workdir')), 'feeds') self.dbfile = os.path.join(self.workdir, 'rss.db') self.url = url self.title = title self.max_entries = max_entries - self.css_style = css_style + self.user_agent = user_agent + self.body_style = body_style + self.title_style = title_style + self.subtitle_style = subtitle_style + self.article_title_style = article_title_style + self.article_link_style = article_link_style + self.article_content_style = article_content_style # If true, then the http.webpage plugin will be used to parse the content self.extract_content = extract_content @@ -107,17 +156,10 @@ class RssUpdates(HttpRequest): source_record.title = self.title content = u''' -