Wrapped the RSS parsing loop in a try-except block to prevent the backend from continuously crashing if there are encoding issues on the RSS titles

This commit is contained in:
Fabio Manganiello 2018-10-21 14:47:52 +02:00
parent 7adfb67c12
commit ae7cd120d2
1 changed files with 30 additions and 26 deletions

View File

@ -127,38 +127,42 @@ class RssUpdates(HttpRequest):
if not entry.published_parsed: if not entry.published_parsed:
continue continue
entry_timestamp = datetime.datetime(*entry.published_parsed[:6]) try:
entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
if latest_update is None \ if latest_update is None \
or entry_timestamp > latest_update: or entry_timestamp > latest_update:
self.logger.info('Processed new item from RSS feed <{}>: "{}"' self.logger.info('Processed new item from RSS feed <{}>: "{}"'
.format(self.url, entry.title)) .format(self.url, entry.title))
entry.summary = entry.summary if hasattr(entry, 'summary') else None entry.summary = entry.summary if hasattr(entry, 'summary') else None
if self.mercury_api_key: if self.mercury_api_key:
entry.content = self._parse_entry_content(entry.link) entry.content = self._parse_entry_content(entry.link)
elif hasattr(entry, 'summary'): elif hasattr(entry, 'summary'):
entry.content = entry.summary entry.content = entry.summary
else: else:
entry.content = None entry.content = None
digest += '<h1 style="page-break-before: always">{}</h1>{}' \ digest += '<h1 style="page-break-before: always">{}</h1>{}' \
.format(entry.title, entry.content) .format(entry.title, entry.content)
e = { e = {
'entry_id': entry.id, 'entry_id': entry.id,
'title': entry.title, 'title': entry.title,
'link': entry.link, 'link': entry.link,
'summary': entry.summary, 'summary': entry.summary,
'content': entry.content, 'content': entry.content,
'source_id': source_record.id, 'source_id': source_record.id,
'published': entry_timestamp, 'published': entry_timestamp,
} }
entries.append(e) entries.append(e)
session.add(FeedEntry(**e)) session.add(FeedEntry(**e))
if self.max_entries and len(entries) > self.max_entries: break if self.max_entries and len(entries) > self.max_entries: break
except Exception as e:
self.logger.warning('Exception encountered while parsing RSS ' +
'RSS feed {}: {}'.format(self.url, str(e)))
source_record.last_updated_at = parse_start_time source_record.last_updated_at = parse_start_time
digest_filename = None digest_filename = None