From dca41ea86e2c042a014e032cf4afd85f7ce5acc7 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <blacklight86@gmail.com>
Date: Tue, 1 May 2018 10:13:37 +0200
Subject: [PATCH] A more robust logic for spotting new RSS items

---
 platypush/backend/http/request/rss/__init__.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/platypush/backend/http/request/rss/__init__.py b/platypush/backend/http/request/rss/__init__.py
index a2dcbf681..3a4f21492 100644
--- a/platypush/backend/http/request/rss/__init__.py
+++ b/platypush/backend/http/request/rss/__init__.py
@@ -11,6 +11,7 @@ from sqlalchemy import create_engine, Column, Integer, String, DateTime, \
 
 from sqlalchemy.orm import sessionmaker, scoped_session
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.sql.expression import func
 
 from platypush.backend.http.request import HttpRequest
 from platypush.config import Config
@@ -52,13 +53,17 @@ class RssUpdates(HttpRequest):
     def _get_or_create_source(self, session):
         record = session.query(FeedSource).filter_by(url=self.url).first()
         if record is None:
-            record = FeedSource(url=self.url)
+            record = FeedSource(url=self.url, title=self.title)
             session.add(record)
 
         session.commit()
         return record
 
 
+    def _get_latest_update(self, session, source_id):
+        return session.query(func.max(FeedEntry.published)).filter_by(source_id=source_id).scalar()
+
+
     def _parse_entry_content(self, link):
         response = None
 
@@ -86,6 +91,7 @@ class RssUpdates(HttpRequest):
         session.add(source_record)
         parse_start_time = datetime.datetime.utcnow()
         entries = []
+        latest_update = self._get_latest_update(session, source_record.id)
 
         if not self.title and 'title' in feed.feed:
             self.title = feed.feed['title']
@@ -104,8 +110,8 @@ class RssUpdates(HttpRequest):
         for entry in feed.entries:
             entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
 
-            if source_record.last_updated_at is None \
-                    or entry_timestamp > source_record.last_updated_at:
+            if latest_update is None \
+                    or entry_timestamp > latest_update:
                 logging.info('Processed new item from RSS feed <{}>: "{}"'
                              .format(self.url, entry.title))