forked from platypush/platypush
Support for RSS feeds update events, solves #48
This commit is contained in:
parent
bcda9ef809
commit
14afbcad3a
7 changed files with 180 additions and 14 deletions
|
@ -24,12 +24,14 @@ class HttpRequest(object):
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, args, bus=None, poll_seconds=None, timeout=None, **kwargs):
|
def __init__(self, args, bus=None, poll_seconds=None, timeout=None,
|
||||||
|
skip_first_call=True, **kwargs):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.poll_seconds = poll_seconds or self.poll_seconds
|
self.poll_seconds = poll_seconds or self.poll_seconds
|
||||||
self.timeout = timeout or self.timeout
|
self.timeout = timeout or self.timeout
|
||||||
self.bus = bus
|
self.bus = bus
|
||||||
|
self.skip_first_call = skip_first_call
|
||||||
self.last_request_timestamp = 0
|
self.last_request_timestamp = 0
|
||||||
|
|
||||||
if isinstance(args, self.HttpRequestArguments):
|
if isinstance(args, self.HttpRequestArguments):
|
||||||
|
@ -62,8 +64,10 @@ class HttpRequest(object):
|
||||||
else:
|
else:
|
||||||
event = HttpEvent(dict(self), new_items)
|
event = HttpEvent(dict(self), new_items)
|
||||||
|
|
||||||
if new_items and not is_first_call and self.bus:
|
if new_items and self.bus:
|
||||||
self.bus.post(event)
|
if not self.skip_first_call or (
|
||||||
|
self.skip_first_call and not is_first_call):
|
||||||
|
self.bus.post(event)
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
|
|
147
platypush/backend/http/request/rss/__init__.py
Normal file
147
platypush/backend/http/request/rss/__init__.py
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
import datetime
|
||||||
|
import feedparser
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
|
from sqlalchemy import create_engine, Column, Integer, String, DateTime, \
|
||||||
|
UniqueConstraint, ForeignKey
|
||||||
|
|
||||||
|
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
from platypush.backend.http.request import HttpRequest
|
||||||
|
from platypush.config import Config
|
||||||
|
from platypush.message.event.http.rss import NewFeedEvent
|
||||||
|
from platypush.utils import mkdir_p
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
Session = scoped_session(sessionmaker())
|
||||||
|
|
||||||
|
|
||||||
|
class GetRssUpdates(HttpRequest):
|
||||||
|
""" Gets new items in an RSS feed """
|
||||||
|
|
||||||
|
dbfile = os.path.join(os.path.expanduser(Config.get('workdir')), 'feeds', 'rss.db')
|
||||||
|
|
||||||
|
def __init__(self, url, headers=None, params=None, dbfile=None,
|
||||||
|
mercury_api_key=None, *args, **kwargs):
|
||||||
|
|
||||||
|
self.url = url
|
||||||
|
self.mercury_api_key = mercury_api_key # Mercury Reader API used to parse the content of the link
|
||||||
|
|
||||||
|
if dbfile: self.dbfile = dbfile
|
||||||
|
mkdir_p(os.path.dirname(self.dbfile))
|
||||||
|
|
||||||
|
self.engine = create_engine('sqlite:///{}'.format(self.dbfile))
|
||||||
|
Base.metadata.create_all(self.engine)
|
||||||
|
Session.configure(bind=self.engine)
|
||||||
|
self._get_or_create_source(session=Session())
|
||||||
|
|
||||||
|
request_args = {
|
||||||
|
'method': 'get',
|
||||||
|
'url': self.url,
|
||||||
|
'headers': headers or {},
|
||||||
|
'params': params or {},
|
||||||
|
}
|
||||||
|
|
||||||
|
super().__init__(*args, skip_first_call=False, args=request_args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_or_create_source(self, session):
|
||||||
|
record = session.query(FeedSource).filter_by(url=self.url).first()
|
||||||
|
if record is None:
|
||||||
|
record = FeedSource(url=self.url)
|
||||||
|
session.add(record)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_entry_content(self, link):
|
||||||
|
response = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.info('Parsing content for {}'.format(link))
|
||||||
|
response = requests.get('https://mercury.postlight.com/parser',
|
||||||
|
params = {'url': link},
|
||||||
|
headers = {'x-api-key': self.mercury_api_key })
|
||||||
|
except Exception as e: logging.exception(e)
|
||||||
|
|
||||||
|
return response.json()['content'] if response and response.ok else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_new_items(self, response):
|
||||||
|
feed = feedparser.parse(response.text)
|
||||||
|
session = Session()
|
||||||
|
source_record = self._get_or_create_source(session=session)
|
||||||
|
session.add(source_record)
|
||||||
|
parse_start_time = datetime.datetime.utcnow()
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
last_updated_at = source_record.last_updated_at
|
||||||
|
|
||||||
|
if source_record.title != feed.feed['title']:
|
||||||
|
source_record.title = feed.feed['title']
|
||||||
|
|
||||||
|
for entry in feed.entries:
|
||||||
|
entry_timestamp = datetime.datetime(*entry.published_parsed[:6])
|
||||||
|
|
||||||
|
if last_updated_at is None: last_updated_at = entry_timestamp
|
||||||
|
if source_record.last_updated_at is None \
|
||||||
|
or entry_timestamp > source_record.last_updated_at:
|
||||||
|
last_updated_at = max(entry_timestamp, last_updated_at)
|
||||||
|
|
||||||
|
entry.content = self._parse_entry_content(entry.link) \
|
||||||
|
if self.mercury_api_key else None
|
||||||
|
|
||||||
|
e = {
|
||||||
|
'entry_id': entry.id,
|
||||||
|
'title': entry.title,
|
||||||
|
'link': entry.link,
|
||||||
|
'summary': entry.summary,
|
||||||
|
'content': entry.content,
|
||||||
|
'source_id': source_record.id,
|
||||||
|
'published': entry_timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.append(e)
|
||||||
|
session.add(FeedEntry(**e))
|
||||||
|
|
||||||
|
source_record.last_updated_at = parse_start_time
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
if entries:
|
||||||
|
logging.info('Parsed {} new entries from the RSS feed {}'.format(
|
||||||
|
len(entries), source_record.title))
|
||||||
|
|
||||||
|
return NewFeedEvent(dict(self), entries)
|
||||||
|
|
||||||
|
|
||||||
|
class FeedSource(Base):
|
||||||
|
__tablename__ = 'FeedSource'
|
||||||
|
__table_args__ = ({ 'sqlite_autoincrement': True })
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
title = Column(String)
|
||||||
|
url = Column(String, unique=True)
|
||||||
|
last_updated_at = Column(DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
class FeedEntry(Base):
|
||||||
|
__tablename__ = 'FeedEntry'
|
||||||
|
__table_args__ = ({ 'sqlite_autoincrement': True })
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
entry_id = Column(String)
|
||||||
|
source_id = Column(Integer, ForeignKey('FeedSource.id'), nullable=False)
|
||||||
|
title = Column(String)
|
||||||
|
link = Column(String)
|
||||||
|
summary = Column(String)
|
||||||
|
content = Column(String)
|
||||||
|
published = Column(DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
from platypush.utils import mkdir_p
|
||||||
|
|
||||||
|
|
||||||
""" Config singleton instance """
|
""" Config singleton instance """
|
||||||
_default_config_instance = None
|
_default_config_instance = None
|
||||||
|
|
||||||
|
@ -237,12 +239,6 @@ class Config(object):
|
||||||
_default_config_instance._config[key] = key
|
_default_config_instance._config[key] = key
|
||||||
|
|
||||||
|
|
||||||
def mkdir_p(path):
|
|
||||||
try: os.makedirs(path)
|
|
||||||
except OSError as exc: # Python >2.5
|
|
||||||
if exc.errno == errno.EEXIST and os.path.isdir(path): pass
|
|
||||||
else: raise
|
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
# vim:sw=4:ts=4:et:
|
||||||
|
|
||||||
|
|
9
platypush/message/event/http/rss.py
Normal file
9
platypush/message/event/http/rss.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from platypush.message.event.http import HttpEvent
|
||||||
|
|
||||||
|
class NewFeedEvent(HttpEvent):
|
||||||
|
def __init__(self, request, response, *args, **kwargs):
|
||||||
|
super().__init__(request=request, response=response, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
|
import errno
|
||||||
import importlib
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import signal
|
import signal
|
||||||
|
|
||||||
from platypush.config import Config
|
|
||||||
from platypush.message import Message
|
|
||||||
|
|
||||||
|
|
||||||
def get_module_and_method_from_action(action):
|
def get_module_and_method_from_action(action):
|
||||||
""" Input : action=music.mpd.play
|
""" Input : action=music.mpd.play
|
||||||
|
@ -65,5 +64,12 @@ def clear_timeout():
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
||||||
|
|
||||||
|
def mkdir_p(path):
|
||||||
|
try: os.makedirs(path)
|
||||||
|
except OSError as exc: # Python >2.5
|
||||||
|
if exc.errno == errno.EEXIST and os.path.isdir(path): pass
|
||||||
|
else: raise
|
||||||
|
|
||||||
|
|
||||||
# vim:sw=4:ts=4:et:
|
# vim:sw=4:ts=4:et:
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,9 @@ sqlalchemy
|
||||||
# Dates support
|
# Dates support
|
||||||
python-dateutil
|
python-dateutil
|
||||||
|
|
||||||
|
# RSS feeds support
|
||||||
|
feedparser
|
||||||
|
|
||||||
# Philips Hue plugin support
|
# Philips Hue plugin support
|
||||||
phue
|
phue
|
||||||
|
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -65,7 +65,8 @@ setup(
|
||||||
'Support for Pushbullet backend': ['requests', 'websocket-client'],
|
'Support for Pushbullet backend': ['requests', 'websocket-client'],
|
||||||
'Support for HTTP backend': ['flask'],
|
'Support for HTTP backend': ['flask'],
|
||||||
'Support for HTTP poll backend': ['frozendict'],
|
'Support for HTTP poll backend': ['frozendict'],
|
||||||
'Support for database plugin': ['sqlalchemy', 'python-dateutil'],
|
'Support for database plugin': ['sqlalchemy'],
|
||||||
|
'Support for RSS feeds': ['feedparser'],
|
||||||
'Support for Philips Hue plugin': ['phue'],
|
'Support for Philips Hue plugin': ['phue'],
|
||||||
'Support for MPD/Mopidy music server plugin': ['python-mpd2'],
|
'Support for MPD/Mopidy music server plugin': ['python-mpd2'],
|
||||||
'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
|
'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
|
||||||
|
|
Loading…
Reference in a new issue