forked from platypush/platypush
Support for RSS feeds update events, solves #48
This commit is contained in:
parent
bcda9ef809
commit
14afbcad3a
7 changed files with 180 additions and 14 deletions
|
@ -24,12 +24,14 @@ class HttpRequest(object):
|
|||
self.kwargs = kwargs
|
||||
|
||||
|
||||
def __init__(self, args, bus=None, poll_seconds=None, timeout=None, **kwargs):
|
||||
def __init__(self, args, bus=None, poll_seconds=None, timeout=None,
|
||||
skip_first_call=True, **kwargs):
|
||||
super().__init__()
|
||||
|
||||
self.poll_seconds = poll_seconds or self.poll_seconds
|
||||
self.timeout = timeout or self.timeout
|
||||
self.bus = bus
|
||||
self.skip_first_call = skip_first_call
|
||||
self.last_request_timestamp = 0
|
||||
|
||||
if isinstance(args, self.HttpRequestArguments):
|
||||
|
@ -62,8 +64,10 @@ class HttpRequest(object):
|
|||
else:
|
||||
event = HttpEvent(dict(self), new_items)
|
||||
|
||||
if new_items and not is_first_call and self.bus:
|
||||
self.bus.post(event)
|
||||
if new_items and self.bus:
|
||||
if not self.skip_first_call or (
|
||||
self.skip_first_call and not is_first_call):
|
||||
self.bus.post(event)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
|
|
147
platypush/backend/http/request/rss/__init__.py
Normal file
147
platypush/backend/http/request/rss/__init__.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
import datetime
|
||||
import feedparser
|
||||
import logging
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime, \
|
||||
UniqueConstraint, ForeignKey
|
||||
|
||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
from platypush.backend.http.request import HttpRequest
|
||||
from platypush.config import Config
|
||||
from platypush.message.event.http.rss import NewFeedEvent
|
||||
from platypush.utils import mkdir_p
|
||||
|
||||
Base = declarative_base()
|
||||
Session = scoped_session(sessionmaker())
|
||||
|
||||
|
||||
class GetRssUpdates(HttpRequest):
|
||||
""" Gets new items in an RSS feed """
|
||||
|
||||
dbfile = os.path.join(os.path.expanduser(Config.get('workdir')), 'feeds', 'rss.db')
|
||||
|
||||
def __init__(self, url, headers=None, params=None, dbfile=None,
|
||||
mercury_api_key=None, *args, **kwargs):
|
||||
|
||||
self.url = url
|
||||
self.mercury_api_key = mercury_api_key # Mercury Reader API used to parse the content of the link
|
||||
|
||||
if dbfile: self.dbfile = dbfile
|
||||
mkdir_p(os.path.dirname(self.dbfile))
|
||||
|
||||
self.engine = create_engine('sqlite:///{}'.format(self.dbfile))
|
||||
Base.metadata.create_all(self.engine)
|
||||
Session.configure(bind=self.engine)
|
||||
self._get_or_create_source(session=Session())
|
||||
|
||||
request_args = {
|
||||
'method': 'get',
|
||||
'url': self.url,
|
||||
'headers': headers or {},
|
||||
'params': params or {},
|
||||
}
|
||||
|
||||
super().__init__(*args, skip_first_call=False, args=request_args, **kwargs)
|
||||
|
||||
|
||||
def _get_or_create_source(self, session):
|
||||
record = session.query(FeedSource).filter_by(url=self.url).first()
|
||||
if record is None:
|
||||
record = FeedSource(url=self.url)
|
||||
session.add(record)
|
||||
|
||||
session.commit()
|
||||
return record
|
||||
|
||||
|
||||
def _parse_entry_content(self, link):
|
||||
response = None
|
||||
|
||||
try:
|
||||
logging.info('Parsing content for {}'.format(link))
|
||||
response = requests.get('https://mercury.postlight.com/parser',
|
||||
params = {'url': link},
|
||||
headers = {'x-api-key': self.mercury_api_key })
|
||||
except Exception as e: logging.exception(e)
|
||||
|
||||
return response.json()['content'] if response and response.ok else None
|
||||
|
||||
|
||||
def get_new_items(self, response):
|
||||
feed = feedparser.parse(response.text)
|
||||
session = Session()
|
||||
source_record = self._get_or_create_source(session=session)
|
||||
session.add(source_record)
|
||||
parse_start_time = datetime.datetime.utcnow()
|
||||
|
||||
entries = []
|
||||
last_updated_at = source_record.last_updated_at
|
||||
|
||||
if source_record.title != feed.feed['title']:
|
||||
source_record.title = feed.feed['title']
|
||||
|
||||
for entry in feed.entries:
|
||||
entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
|
||||
|
||||
if last_updated_at is None: last_updated_at = entry_timestamp
|
||||
if source_record.last_updated_at is None \
|
||||
or entry_timestamp > source_record.last_updated_at:
|
||||
last_updated_at = max(entry_timestamp, last_updated_at)
|
||||
|
||||
entry.content = self._parse_entry_content(entry.link) \
|
||||
if self.mercury_api_key else None
|
||||
|
||||
e = {
|
||||
'entry_id': entry.id,
|
||||
'title': entry.title,
|
||||
'link': entry.link,
|
||||
'summary': entry.summary,
|
||||
'content': entry.content,
|
||||
'source_id': source_record.id,
|
||||
'published': entry_timestamp,
|
||||
}
|
||||
|
||||
entries.append(e)
|
||||
session.add(FeedEntry(**e))
|
||||
|
||||
source_record.last_updated_at = parse_start_time
|
||||
session.commit()
|
||||
|
||||
if entries:
|
||||
logging.info('Parsed {} new entries from the RSS feed {}'.format(
|
||||
len(entries), source_record.title))
|
||||
|
||||
return NewFeedEvent(dict(self), entries)
|
||||
|
||||
|
||||
class FeedSource(Base):
|
||||
__tablename__ = 'FeedSource'
|
||||
__table_args__ = ({ 'sqlite_autoincrement': True })
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
title = Column(String)
|
||||
url = Column(String, unique=True)
|
||||
last_updated_at = Column(DateTime)
|
||||
|
||||
|
||||
class FeedEntry(Base):
|
||||
__tablename__ = 'FeedEntry'
|
||||
__table_args__ = ({ 'sqlite_autoincrement': True })
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
entry_id = Column(String)
|
||||
source_id = Column(Integer, ForeignKey('FeedSource.id'), nullable=False)
|
||||
title = Column(String)
|
||||
link = Column(String)
|
||||
summary = Column(String)
|
||||
content = Column(String)
|
||||
published = Column(DateTime)
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
import datetime
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import yaml
|
||||
|
||||
from platypush.utils import mkdir_p
|
||||
|
||||
|
||||
""" Config singleton instance """
|
||||
_default_config_instance = None
|
||||
|
||||
|
@ -237,12 +239,6 @@ class Config(object):
|
|||
_default_config_instance._config[key] = key
|
||||
|
||||
|
||||
def mkdir_p(path):
|
||||
try: os.makedirs(path)
|
||||
except OSError as exc: # Python >2.5
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(path): pass
|
||||
else: raise
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
||||
|
||||
|
|
9
platypush/message/event/http/rss.py
Normal file
9
platypush/message/event/http/rss.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from platypush.message.event.http import HttpEvent
|
||||
|
||||
class NewFeedEvent(HttpEvent):
|
||||
def __init__(self, request, response, *args, **kwargs):
|
||||
super().__init__(request=request, response=response, *args, **kwargs)
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
||||
|
|
@ -1,10 +1,9 @@
|
|||
import errno
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
|
||||
from platypush.config import Config
|
||||
from platypush.message import Message
|
||||
|
||||
|
||||
def get_module_and_method_from_action(action):
|
||||
""" Input : action=music.mpd.play
|
||||
|
@ -65,5 +64,12 @@ def clear_timeout():
|
|||
signal.alarm(0)
|
||||
|
||||
|
||||
def mkdir_p(path):
|
||||
try: os.makedirs(path)
|
||||
except OSError as exc: # Python >2.5
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(path): pass
|
||||
else: raise
|
||||
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
||||
|
||||
|
|
|
@ -20,6 +20,9 @@ sqlalchemy
|
|||
# Dates support
|
||||
python-dateutil
|
||||
|
||||
# RSS feeds support
|
||||
feedparser
|
||||
|
||||
# Philips Hue plugin support
|
||||
phue
|
||||
|
||||
|
|
3
setup.py
3
setup.py
|
@ -65,7 +65,8 @@ setup(
|
|||
'Support for Pushbullet backend': ['requests', 'websocket-client'],
|
||||
'Support for HTTP backend': ['flask'],
|
||||
'Support for HTTP poll backend': ['frozendict'],
|
||||
'Support for database plugin': ['sqlalchemy', 'python-dateutil'],
|
||||
'Support for database plugin': ['sqlalchemy'],
|
||||
'Support for RSS feeds': ['feedparser'],
|
||||
'Support for Philips Hue plugin': ['phue'],
|
||||
'Support for MPD/Mopidy music server plugin': ['python-mpd2'],
|
||||
'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
|
||||
|
|
Loading…
Reference in a new issue