Be more robust in case of Mercury API failures while parsing RSS feeds. TODO: the Mercury API has now been retired and replaced by a library, that however is only available for Node.js. Figure out how to wrap the new library in Python 3

This commit is contained in:
Fabio Manganiello 2019-07-19 15:00:06 +00:00
parent f12c6db34f
commit 57712b3693

View file

@ -87,10 +87,21 @@ class RssUpdates(HttpRequest):
raise err raise err
if not response.text: if not response.text:
raise RuntimeError("No response from Mercury API for URL {} after {} tries" self.logger.warning('No response from Mercury API for URL {} after {} tries'.format(link, n_tries))
.format(link, n_tries)) return
return response.json()['content'] if response and response.ok else None if not response.ok:
self.logger.warning('Mercury API call failed with status {}'.format(response.status_code))
return
response = response.json()
error = response.get('error')
if error:
self.logger.warning('Mercury API error: {}'.format(error))
return
return response.get('content')
def get_new_items(self, response): def get_new_items(self, response):
@ -161,6 +172,7 @@ class RssUpdates(HttpRequest):
except Exception as e: except Exception as e:
self.logger.warning('Exception encountered while parsing RSS ' + self.logger.warning('Exception encountered while parsing RSS ' +
'RSS feed {}: {}'.format(self.url, str(e))) 'RSS feed {}: {}'.format(self.url, str(e)))
self.logger.exception(e)
source_record.last_updated_at = parse_start_time source_record.last_updated_at = parse_start_time
digest_filename = None digest_filename = None