From ae7cd120d223e4238c6b4b1e4fd1d91c1932a8c3 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sun, 21 Oct 2018 14:47:52 +0200 Subject: [PATCH] Wrapped the RSS parsing loop in a try-except block to prevent the backend from continuously crashing if there are encoding issues on the RSS titles --- .../backend/http/request/rss/__init__.py | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/platypush/backend/http/request/rss/__init__.py b/platypush/backend/http/request/rss/__init__.py index aac0198e3..2c00945a2 100644 --- a/platypush/backend/http/request/rss/__init__.py +++ b/platypush/backend/http/request/rss/__init__.py @@ -127,38 +127,42 @@ class RssUpdates(HttpRequest): if not entry.published_parsed: continue - entry_timestamp = datetime.datetime(*entry.published_parsed[:6]) + try: + entry_timestamp = datetime.datetime(*entry.published_parsed[:6]) - if latest_update is None \ - or entry_timestamp > latest_update: - self.logger.info('Processed new item from RSS feed <{}>: "{}"' - .format(self.url, entry.title)) + if latest_update is None \ + or entry_timestamp > latest_update: + self.logger.info('Processed new item from RSS feed <{}>: "{}"' + .format(self.url, entry.title)) - entry.summary = entry.summary if hasattr(entry, 'summary') else None + entry.summary = entry.summary if hasattr(entry, 'summary') else None - if self.mercury_api_key: - entry.content = self._parse_entry_content(entry.link) - elif hasattr(entry, 'summary'): - entry.content = entry.summary - else: - entry.content = None + if self.mercury_api_key: + entry.content = self._parse_entry_content(entry.link) + elif hasattr(entry, 'summary'): + entry.content = entry.summary + else: + entry.content = None - digest += '

{}

{}' \ - .format(entry.title, entry.content) + digest += '

{}

{}' \ + .format(entry.title, entry.content) - e = { - 'entry_id': entry.id, - 'title': entry.title, - 'link': entry.link, - 'summary': entry.summary, - 'content': entry.content, - 'source_id': source_record.id, - 'published': entry_timestamp, - } + e = { + 'entry_id': entry.id, + 'title': entry.title, + 'link': entry.link, + 'summary': entry.summary, + 'content': entry.content, + 'source_id': source_record.id, + 'published': entry_timestamp, + } - entries.append(e) - session.add(FeedEntry(**e)) - if self.max_entries and len(entries) > self.max_entries: break + entries.append(e) + session.add(FeedEntry(**e)) + if self.max_entries and len(entries) > self.max_entries: break + except Exception as e: + self.logger.warning('Exception encountered while parsing RSS ' + + 'RSS feed {}: {}'.format(self.url, str(e))) source_record.last_updated_at = parse_start_time digest_filename = None