From dca41ea86e2c042a014e032cf4afd85f7ce5acc7 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Tue, 1 May 2018 10:13:37 +0200 Subject: [PATCH] A more robust logic for spotting new RSS items --- platypush/backend/http/request/rss/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/platypush/backend/http/request/rss/__init__.py b/platypush/backend/http/request/rss/__init__.py index a2dcbf681..3a4f21492 100644 --- a/platypush/backend/http/request/rss/__init__.py +++ b/platypush/backend/http/request/rss/__init__.py @@ -11,6 +11,7 @@ from sqlalchemy import create_engine, Column, Integer, String, DateTime, \ from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.sql.expression import func from platypush.backend.http.request import HttpRequest from platypush.config import Config @@ -52,13 +53,17 @@ class RssUpdates(HttpRequest): def _get_or_create_source(self, session): record = session.query(FeedSource).filter_by(url=self.url).first() if record is None: - record = FeedSource(url=self.url) + record = FeedSource(url=self.url, title=self.title) session.add(record) session.commit() return record + def _get_latest_update(self, session, source_id): + return session.query(func.max(FeedEntry.published)).filter_by(source_id=source_id).scalar() + + def _parse_entry_content(self, link): response = None @@ -86,6 +91,7 @@ class RssUpdates(HttpRequest): session.add(source_record) parse_start_time = datetime.datetime.utcnow() entries = [] + latest_update = self._get_latest_update(session, source_record.id) if not self.title and 'title' in feed.feed: self.title = feed.feed['title'] @@ -104,8 +110,8 @@ class RssUpdates(HttpRequest): for entry in feed.entries: entry_timestamp = datetime.datetime(*entry.published_parsed[:6]) - if source_record.last_updated_at is None \ - or entry_timestamp > source_record.last_updated_at: + if latest_update is None \ + or entry_timestamp > latest_update: logging.info('Processed new item from RSS feed <{}>: "{}"' .format(self.url, entry.title))