A more robust logic for spotting new RSS items
This commit is contained in:
parent
d12ebe8810
commit
dca41ea86e
1 changed files with 9 additions and 3 deletions
|
@ -11,6 +11,7 @@ from sqlalchemy import create_engine, Column, Integer, String, DateTime, \
|
||||||
|
|
||||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
from sqlalchemy.sql.expression import func
|
||||||
|
|
||||||
from platypush.backend.http.request import HttpRequest
|
from platypush.backend.http.request import HttpRequest
|
||||||
from platypush.config import Config
|
from platypush.config import Config
|
||||||
|
@ -52,13 +53,17 @@ class RssUpdates(HttpRequest):
|
||||||
def _get_or_create_source(self, session):
|
def _get_or_create_source(self, session):
|
||||||
record = session.query(FeedSource).filter_by(url=self.url).first()
|
record = session.query(FeedSource).filter_by(url=self.url).first()
|
||||||
if record is None:
|
if record is None:
|
||||||
record = FeedSource(url=self.url)
|
record = FeedSource(url=self.url, title=self.title)
|
||||||
session.add(record)
|
session.add(record)
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def _get_latest_update(self, session, source_id):
|
||||||
|
return session.query(func.max(FeedEntry.published)).filter_by(source_id=source_id).scalar()
|
||||||
|
|
||||||
|
|
||||||
def _parse_entry_content(self, link):
|
def _parse_entry_content(self, link):
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
|
@ -86,6 +91,7 @@ class RssUpdates(HttpRequest):
|
||||||
session.add(source_record)
|
session.add(source_record)
|
||||||
parse_start_time = datetime.datetime.utcnow()
|
parse_start_time = datetime.datetime.utcnow()
|
||||||
entries = []
|
entries = []
|
||||||
|
latest_update = self._get_latest_update(session, source_record.id)
|
||||||
|
|
||||||
if not self.title and 'title' in feed.feed:
|
if not self.title and 'title' in feed.feed:
|
||||||
self.title = feed.feed['title']
|
self.title = feed.feed['title']
|
||||||
|
@ -104,8 +110,8 @@ class RssUpdates(HttpRequest):
|
||||||
for entry in feed.entries:
|
for entry in feed.entries:
|
||||||
entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
|
entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
|
||||||
|
|
||||||
if source_record.last_updated_at is None \
|
if latest_update is None \
|
||||||
or entry_timestamp > source_record.last_updated_at:
|
or entry_timestamp > latest_update:
|
||||||
logging.info('Processed new item from RSS feed <{}>: "{}"'
|
logging.info('Processed new item from RSS feed <{}>: "{}"'
|
||||||
.format(self.url, entry.title))
|
.format(self.url, entry.title))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue