Refactored RSS integration into its own rss plugin [closes #199]

This commit is contained in:
Fabio Manganiello 2022-01-06 00:46:05 +01:00
parent 3e4b91cd6c
commit e9f6d9a8bc
Signed by: blacklight
GPG key ID: D90FBA7F76362774
20 changed files with 312 additions and 30 deletions

View file

@ -50,6 +50,7 @@ Events
platypush/events/ping.rst platypush/events/ping.rst
platypush/events/pushbullet.rst platypush/events/pushbullet.rst
platypush/events/qrcode.rst platypush/events/qrcode.rst
platypush/events/rss.rst
platypush/events/scard.rst platypush/events/scard.rst
platypush/events/sensor.rst platypush/events/sensor.rst
platypush/events/sensor.ir.rst platypush/events/sensor.ir.rst

View file

@ -0,0 +1,5 @@
``platypush.message.event.rss``
===============================
.. automodule:: platypush.message.event.rss
:members:

View file

@ -0,0 +1,5 @@
``rss``
=======
.. automodule:: platypush.plugins.rss
:members:

View file

@ -103,6 +103,7 @@ Plugins
platypush/plugins/pwm.pca9685.rst platypush/plugins/pwm.pca9685.rst
platypush/plugins/qrcode.rst platypush/plugins/qrcode.rst
platypush/plugins/redis.rst platypush/plugins/redis.rst
platypush/plugins/rss.rst
platypush/plugins/rtorrent.rst platypush/plugins/rtorrent.rst
platypush/plugins/serial.rst platypush/plugins/serial.rst
platypush/plugins/shell.rst platypush/plugins/shell.rst

View file

@ -7,6 +7,9 @@ from platypush.backend.http.request import HttpRequest
class HttpPollBackend(Backend): class HttpPollBackend(Backend):
""" """
WARNING: This integration is deprecated, since it was practically only used for RSS subscriptions.
RSS feeds integration has been replaced by :class:`platypush.plugins.rss.RSSPlugin`.
This backend will poll multiple HTTP endpoints/services and return events This backend will poll multiple HTTP endpoints/services and return events
the bus whenever something new happened. Supported types: the bus whenever something new happened. Supported types:
:class:`platypush.backend.http.request.JsonHttpRequest` (for polling updates on :class:`platypush.backend.http.request.JsonHttpRequest` (for polling updates on

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,2 +0,0 @@
(window["webpackJsonp"]=window["webpackJsonp"]||[]).push([["chunk-75e68c24"],{"0709":function(e,t,r){"use strict";r("84c2")},"84c2":function(e,t,r){},c306:function(e,t,r){"use strict";r.r(t);var n=r("7a23"),s=Object(n["K"])("data-v-1b599aef");Object(n["u"])("data-v-1b599aef");var i={class:"rss-news"},c={key:0,class:"article"};Object(n["s"])();var u=s((function(e,t,r,s,u,a){return Object(n["r"])(),Object(n["e"])("div",i,[e.currentArticle?(Object(n["r"])(),Object(n["e"])("div",c,[Object(n["h"])("div",{class:"source",textContent:Object(n["C"])(e.currentArticle.source)},null,8,["textContent"]),Object(n["h"])("div",{class:"title",textContent:Object(n["C"])(e.currentArticle.title)},null,8,["textContent"]),Object(n["h"])("div",{class:"published",textContent:Object(n["C"])(new Date(e.currentArticle.published).toDateString()+", "+new Date(e.currentArticle.published).toTimeString().substring(0,5))},null,8,["textContent"])])):Object(n["f"])("",!0)])})),a=r("2909"),l=r("1da1"),o=(r("96cf"),r("a9e3"),r("b680"),r("3e54")),d={name:"RssNews",mixins:[o["a"]],props:{db:{type:String,required:!0},limit:{type:Number,required:!1,default:25},refreshSeconds:{type:Number,required:!1,default:15}},data:function(){return{articles:[],queue:[],currentArticle:void 0}},methods:{refresh:function(){var e=Object(l["a"])(regeneratorRuntime.mark((function e(){return regeneratorRuntime.wrap((function(e){while(1)switch(e.prev=e.next){case 0:if(this.queue.length){e.next=5;break}return e.next=3,this.request("db.select",{engine:this.db,query:"\n select s.title as source, e.title, e.summary,\n strftime('%Y-%m-%dT%H:%M:%fZ', e.published) as published\n from FeedEntry e join FeedSource s\n on e.source_id = s.id order by e.published desc limit ".concat(this.limit)});case 3:this.articles=e.sent,this.queue=Object(a["a"])(this.articles);case 5:if(this.queue.length){e.next=7;break}return e.abrupt("return");case 7:this.currentArticle=this.queue.pop();case 8:case"end":return e.stop()}}),e,this)})));function t(){return e.apply(this,arguments)}return t}()},mounted:function(){this.refresh(),setInterval(this.refresh,parseInt((1e3*this.refreshSeconds).toFixed(0)))}};r("0709");d.render=u,d.__scopeId="data-v-1b599aef";t["default"]=d}}]);
//# sourceMappingURL=chunk-75e68c24.6b249468.js.map

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,2 @@
(window["webpackJsonp"]=window["webpackJsonp"]||[]).push([["chunk-7babe442"],{"0d43":function(e,t,r){"use strict";r("ddbd")},c306:function(e,t,r){"use strict";r.r(t);var n=r("7a23"),s=Object(n["K"])("data-v-52a823f4");Object(n["u"])("data-v-52a823f4");var i={class:"rss-news"},c={key:0,class:"article"};Object(n["s"])();var u=s((function(e,t,r,s,u,a){return Object(n["r"])(),Object(n["e"])("div",i,[e.currentArticle?(Object(n["r"])(),Object(n["e"])("div",c,[Object(n["h"])("div",{class:"source",textContent:Object(n["C"])(e.currentArticle.feed_title||e.currentArticle.feed_url)},null,8,["textContent"]),Object(n["h"])("div",{class:"title",textContent:Object(n["C"])(e.currentArticle.title)},null,8,["textContent"]),Object(n["h"])("div",{class:"published",textContent:Object(n["C"])(new Date(e.currentArticle.published).toDateString()+", "+new Date(e.currentArticle.published).toTimeString().substring(0,5))},null,8,["textContent"])])):Object(n["f"])("",!0)])})),a=r("2909"),l=r("1da1"),d=(r("96cf"),r("a9e3"),r("b680"),r("3e54")),o={name:"RssNews",mixins:[d["a"]],props:{limit:{type:Number,required:!1,default:25},refreshSeconds:{type:Number,required:!1,default:15}},data:function(){return{articles:[],queue:[],currentArticle:void 0}},methods:{refresh:function(){var e=Object(l["a"])(regeneratorRuntime.mark((function e(){return regeneratorRuntime.wrap((function(e){while(1)switch(e.prev=e.next){case 0:if(this.queue.length){e.next=5;break}return e.next=3,this.request("rss.get_latest_entries",{limit:this.limit});case 3:this.articles=e.sent,this.queue=Object(a["a"])(this.articles).reverse();case 5:if(this.queue.length){e.next=7;break}return e.abrupt("return");case 7:this.currentArticle=this.queue.pop();case 8:case"end":return e.stop()}}),e,this)})));function t(){return e.apply(this,arguments)}return t}()},mounted:function(){this.refresh(),setInterval(this.refresh,parseInt((1e3*this.refreshSeconds).toFixed(0)))}};r("0d43");o.render=u,o.__scopeId="data-v-52a823f4";t["default"]=o},ddbd:function(e,t,r){}}]);
//# sourceMappingURL=chunk-7babe442.e3a7971d.js.map

View file

@ -0,0 +1 @@
{"version":3,"sources":["webpack:///./src/components/widgets/RssNews/Index.vue?a9df","webpack:///./src/components/widgets/RssNews/Index.vue","webpack:///./src/components/widgets/RssNews/Index.vue?6001"],"names":["class","currentArticle","feed_title","feed_url","title","Date","published","toDateString","toTimeString","substring","name","mixins","Utils","props","limit","type","Number","required","default","refreshSeconds","data","articles","queue","undefined","methods","refresh","this","length","request","reverse","pop","mounted","setInterval","parseInt","toFixed","render","__scopeId"],"mappings":"kHAAA,W,sICCOA,MAAM,Y,SACJA,MAAM,W,wEADb,eAMM,MANN,EAMM,CALuB,EAAAC,gB,iBAA3B,eAIM,MAJN,EAIM,CAHJ,eAAwF,OAAnFD,MAAM,S,YAAS,eAA6D,EAAvC,eAACE,YAAc,EAAAD,eAAeE,W,wBACxE,eAAuD,OAAlDH,MAAM,Q,YAAQ,eAA6B,EAAP,eAACI,Q,wBAC1C,eAAkK,OAA7JJ,MAAM,Y,YAAY,eAAoI,IAAxHK,KAAK,EAAAJ,eAAeK,WAAWC,eAAY,SAAgBF,KAAK,EAAAJ,eAAeK,WAAWE,eAAeC,UAAS,O,0HAY5I,GACbC,KAAM,UACNC,OAAQ,CAACC,EAAA,MACTC,MAAO,CAELC,MAAO,CACLC,KAAMC,OACNC,UAAU,EACVC,QAAS,IAIXC,eAAgB,CACdJ,KAAMC,OACNC,UAAU,EACVC,QAAS,KAIbE,KAAM,WACJ,MAAO,CACLC,SAAU,GACVC,MAAO,GACPrB,oBAAgBsB,IAIpBC,QAAS,CACPC,QAAS,WAAF,8CAAE,iGACFC,KAAKJ,MAAMK,OADT,gCAEiBD,KAAKE,QAAQ,yBAA0B,CAC3Dd,MAAOY,KAAKZ,QAHT,OAELY,KAAKL,SAFA,OAMLK,KAAKJ,MAAQ,eAAII,KAAKL,UAAUQ,UAN3B,UASFH,KAAKJ,MAAMK,OATT,iDAYPD,KAAKzB,eAAiByB,KAAKJ,MAAMQ,MAZ1B,gDAAF,qDAAE,IAgBXC,QAAS,WACPL,KAAKD,UACLO,YAAYN,KAAKD,QAASQ,UAA8B,IAApBP,KAAKP,gBAAqBe,QAAQ,O,UC1D1E,EAAOC,OAASA,EAChB,EAAOC,UAAY,kBAEJ,gB","file":"static/js/chunk-7babe442.e3a7971d.js","sourcesContent":["export * from \"-!../../../../node_modules/mini-css-extract-plugin/dist/loader.js??ref--8-oneOf-1-0!../../../../node_modules/css-loader/dist/cjs.js??ref--8-oneOf-1-1!../../../../node_modules/vue-loader-v16/dist/stylePostLoader.js!../../../../node_modules/postcss-loader/src/index.js??ref--8-oneOf-1-2!../../../../node_modules/sass-loader/dist/cjs.js??ref--8-oneOf-1-3!../../../../node_modules/cache-loader/dist/cjs.js??ref--0-0!../../../../node_modules/vue-loader-v16/dist/index.js??ref--0-1!./Index.vue?vue&type=style&index=0&id=52a823f4&lang=scss&scoped=true\"","<template>\n <div class=\"rss-news\">\n <div class=\"article\" v-if=\"currentArticle\">\n <div class=\"source\" v-text=\"currentArticle.feed_title || currentArticle.feed_url\"></div>\n <div class=\"title\" v-text=\"currentArticle.title\"></div>\n <div class=\"published\" v-text=\"new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)\"></div>\n </div>\n </div>\n</template>\n\n<script>\nimport Utils from \"@/Utils\";\n\n/**\n * In order to use this widget you need to configure the `backend.http.poll` backend to\n * poll a list of RSS sources.\n */\nexport default {\n name: \"RssNews\",\n mixins: [Utils],\n props: {\n // Maximum number of items to be shown in a cycle.\n limit: {\n type: Number,\n required: false,\n default: 25,\n },\n\n // How long an entry should be displayed before moving to the next one.\n refreshSeconds: {\n type: Number,\n required: false,\n default: 15,\n },\n },\n\n data: function() {\n return {\n articles: [],\n queue: [],\n currentArticle: undefined,\n }\n },\n\n methods: {\n refresh: async function() {\n if (!this.queue.length) {\n this.articles = await this.request('rss.get_latest_entries', {\n limit: this.limit\n })\n\n this.queue = [...this.articles].reverse()\n }\n\n if (!this.queue.length)\n return\n\n this.currentArticle = this.queue.pop()\n },\n },\n\n mounted: function() {\n this.refresh()\n setInterval(this.refresh, parseInt((this.refreshSeconds*1000).toFixed(0)))\n },\n}\n</script>\n\n<style lang=\"scss\" scoped>\n.rss-news {\n width: 100%;\n height: 100%;\n display: flex;\n align-items: center;\n letter-spacing: .025em;\n\n .article {\n width: 90%;\n padding: 0 2em;\n\n .source {\n font-size: 1.7em;\n font-weight: bold;\n margin-bottom: .5em;\n }\n\n .title {\n font-size: 1.8em;\n font-weight: normal;\n margin-bottom: .5em;\n }\n\n .published {\n text-align: right;\n font-size: 1.1em;\n }\n }\n}\n</style>\n","import { render } from \"./Index.vue?vue&type=template&id=52a823f4&scoped=true\"\nimport script from \"./Index.vue?vue&type=script&lang=js\"\nexport * from \"./Index.vue?vue&type=script&lang=js\"\n\nimport \"./Index.vue?vue&type=style&index=0&id=52a823f4&lang=scss&scoped=true\"\nscript.render = render\nscript.__scopeId = \"data-v-52a823f4\"\n\nexport default script"],"sourceRoot":""}

View file

@ -1,7 +1,7 @@
<template> <template>
<div class="rss-news"> <div class="rss-news">
<div class="article" v-if="currentArticle"> <div class="article" v-if="currentArticle">
<div class="source" v-text="currentArticle.source"></div> <div class="source" v-text="currentArticle.feed_title || currentArticle.feed_url"></div>
<div class="title" v-text="currentArticle.title"></div> <div class="title" v-text="currentArticle.title"></div>
<div class="published" v-text="new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)"></div> <div class="published" v-text="new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)"></div>
</div> </div>
@ -12,21 +12,13 @@
import Utils from "@/Utils"; import Utils from "@/Utils";
/** /**
* In order to use this widget you need to configure the `backend.http.poll` backend to * In order to use this widget you need to configure the `rss` plugin
* poll a list of RSS sources. * with a list of subscriptions.
*/ */
export default { export default {
name: "RssNews", name: "RssNews",
mixins: [Utils], mixins: [Utils],
props: { props: {
// Database engine string pointing to the source of the RSS feeds.
// If not otherwise configured, you should set this to
// `sqlite:///<HOME>/.local/share/platypush/feeds/rss.db`.
db: {
type: String,
required: true,
},
// Maximum number of items to be shown in a cycle. // Maximum number of items to be shown in a cycle.
limit: { limit: {
type: Number, type: Number,
@ -53,16 +45,11 @@ export default {
methods: { methods: {
refresh: async function() { refresh: async function() {
if (!this.queue.length) { if (!this.queue.length) {
this.articles = await this.request('db.select', { this.articles = await this.request('rss.get_latest_entries', {
engine: this.db, limit: this.limit
query: `
select s.title as source, e.title, e.summary,
strftime('%Y-%m-%dT%H:%M:%fZ', e.published) as published
from FeedEntry e join FeedSource s
on e.source_id = s.id order by e.published desc limit ${this.limit}`,
}) })
this.queue = [...this.articles] this.queue = [...this.articles].reverse()
} }
if (!this.queue.length) if (!this.queue.length)

View file

@ -0,0 +1,20 @@
from datetime import datetime
from typing import Optional
from platypush.message.event import Event
class NewFeedEntryEvent(Event):
"""
Event triggered when a new (RSS/Atom) feed entry is received.
"""
def __init__(
self, *, feed_url: str, url: str, title: Optional[str] = None, id: Optional[str] = None,
feed_title: Optional[str] = None, published: Optional[datetime] = None, summary: Optional[str] = None,
content: Optional[str] = None, **kwargs
):
super().__init__(
feed_url=feed_url, url=url, title=title, id=id, feed_title=feed_title,
published=published, summary=summary, content=content, **kwargs
)

View file

@ -105,7 +105,6 @@ class DbPlugin(Plugin):
return table, engine return table, engine
@action @action
def select(self, query=None, table=None, filter=None, engine=None, *args, **kwargs): def select(self, query=None, table=None, filter=None, engine=None, *args, **kwargs):
""" """
@ -179,10 +178,9 @@ class DbPlugin(Plugin):
with engine.connect() as connection: with engine.connect() as connection:
result = connection.execute(query) result = connection.execute(query)
columns = result.keys() columns = result.keys()
rows = [ rows = [
{ columns[i]: row[i] for i in range(0, len(columns)) } {col: row[i] for i, col in enumerate(list(columns))}
for row in result.fetchall() for row in result.fetchall()
] ]

View file

@ -0,0 +1,214 @@
import datetime
import queue
import threading
import time
from typing import Optional, Collection
import dateutil.parser
import requests
from platypush.context import get_bus, get_plugin
from platypush.message.event.rss import NewFeedEntryEvent
from platypush.plugins import RunnablePlugin, action
from platypush.schemas.rss import RssFeedEntrySchema
class RssPlugin(RunnablePlugin):
"""
A plugin for parsing and subscribing to RSS feeds.
Triggers:
- :class:`platypush.message.event.rss.NewFeedEntryEvent` when a new entry is received on a subscribed feed.
Requires:
* **feedparser** (``pip install feedparser``)
"""
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' + \
'Chrome/62.0.3202.94 Safari/537.36'
def __init__(
self, subscriptions: Optional[Collection[str]] = None, poll_seconds: int = 300,
user_agent: str = user_agent, **kwargs
):
"""
:param subscriptions: List of feeds to monitor for updates, as URLs.
:param poll_seconds: How often we should check for updates (default: 300 seconds).
:param user_agent: Custom user agent to use for the requests.
"""
super().__init__(**kwargs)
self.subscriptions = subscriptions or []
self.poll_seconds = poll_seconds
self.user_agent = user_agent
self._latest_timestamps = self._get_latest_timestamps()
self._feed_worker_queues = [queue.Queue()] * 5
self._feed_response_queue = queue.Queue()
self._feed_workers = []
self._latest_entries = []
@staticmethod
def _get_feed_latest_timestamp_varname(url: str) -> str:
return f'LATEST_FEED_TIMESTAMP[{url}]'
def _get_latest_timestamps(self) -> dict:
variable = get_plugin('variable')
return {
url: dateutil.parser.isoparse(
variable.get(
self._get_feed_latest_timestamp_varname(url)
).output.get(self._get_feed_latest_timestamp_varname(url))
)
for url in self.subscriptions
}
def _update_latest_timestamps(self) -> None:
variable = get_plugin('variable')
variable.set(**{
self._get_feed_latest_timestamp_varname(url): latest_timestamp
for url, latest_timestamp in self._latest_timestamps.items()
})
@staticmethod
def _parse_content(entry) -> Optional[str]:
content = getattr(entry, 'content', None)
if not content:
return
if isinstance(content, list):
return content[0]['value']
return content
@action
def parse_feed(self, url: str):
"""
Parse a feed URL.
:param url: Feed URL.
:return: .. schema:: rss.RssFeedEntrySchema(many=True)
"""
import feedparser
feed = feedparser.parse(requests.get(url, headers={'User-Agent': self.user_agent}).text)
return RssFeedEntrySchema().dump(
sorted([
{
'feed_url': url,
'feed_title': getattr(feed.feed, 'title', None),
'id': entry.id,
'url': entry.link,
'published': datetime.datetime.fromtimestamp(time.mktime(entry.published_parsed)),
'title': entry.title,
'summary': getattr(entry, 'summary', None),
'content': self._parse_content(entry),
}
for entry in feed.entries
if getattr(entry, 'published_parsed', None)
], key=lambda e: e['published']),
many=True
)
@action
def get_latest_entries(self, limit: int = 20):
"""
Get the latest entries from the subscribed feeds, sorted by descending published date.
:param limit: Maximum number of entries to return (default: 20).
:return: .. schema:: rss.RssFeedEntrySchema(many=True)
"""
return sorted(self._latest_entries, key=lambda e: e['published'], reverse=True)[:limit]
def _feed_worker(self, q: queue.Queue):
while not self.should_stop():
try:
url = q.get(block=True, timeout=1)
except queue.Empty:
continue
try:
self._feed_response_queue.put({
'url': url,
'content': self.parse_feed(url).output,
})
except Exception as e:
self._feed_response_queue.put({
'url': url,
'error': e,
})
self._feed_response_queue.put(None)
def main(self):
self._feed_workers = [
threading.Thread(target=self._feed_worker, args=(q,))
for q in self._feed_worker_queues
]
for worker in self._feed_workers:
worker.start()
self.logger.info(f'Initialized RSS plugin with {len(self.subscriptions)} subscriptions')
while not self.should_stop():
responses = {}
for i, url in enumerate(self.subscriptions):
worker_queue = self._feed_worker_queues[i % len(self._feed_worker_queues)]
worker_queue.put(url)
time_start = time.time()
timeout = 60
max_time = time_start + timeout
new_entries = []
while (
not self.should_stop() and
len(responses) < len(self.subscriptions) and
time.time() - time_start <= timeout
):
try:
response = self._feed_response_queue.get(block=True, timeout=max_time-time_start)
except queue.Empty:
self.logger.warning('RSS parse timeout')
break
if not response:
continue
url = response['url']
error = response.get('error')
if error:
self.logger.error(f'Could not parse feed {url}: {error}')
responses[url] = error
else:
responses[url] = response['content']
responses = {k: v for k, v in responses.items() if not isinstance(v, Exception)}
for url, response in responses.items():
latest_timestamp = self._latest_timestamps.get(url)
new_entries += response
for entry in response:
published = datetime.datetime.fromisoformat(entry['published'])
if not latest_timestamp or published > latest_timestamp:
latest_timestamp = published
get_bus().post(NewFeedEntryEvent(**entry))
self._latest_timestamps[url] = latest_timestamp
self._update_latest_timestamps()
self._latest_entries = new_entries
time.sleep(self.poll_seconds)
def stop(self):
super().stop()
for worker in self._feed_workers:
worker.join(timeout=60)
self.logger.info('RSS integration stopped')
# vim:sw=4:ts=4:et:

View file

@ -0,0 +1,8 @@
manifest:
events:
platypush.message.event.rss.NewFeedEntryEvent: when a new entry is received on a subscribed feed.
install:
pip:
- feedparser
package: platypush.plugins.rss
type: plugin

40
platypush/schemas/rss.py Normal file
View file

@ -0,0 +1,40 @@
from marshmallow import fields
from marshmallow.schema import Schema
from platypush.schemas import DateTime
class RssFeedEntrySchema(Schema):
feed_title = fields.String(metadata=dict(description='Feed title'))
feed_url = fields.URL(
required=True,
metadata=dict(
description='URL of the feed',
example='https://some-website/rss',
)
)
id = fields.String(
required=True,
metadata=dict(
description='Feed entry ID',
example='1234',
)
)
url = fields.URL(
required=True,
metadata=dict(
description='URL of the feed entry',
example='https://some-website/articles/1234',
)
)
published = DateTime(
required=True,
metadata=dict(description='Entry published time')
)
title = fields.String(metadata=dict(description='Feed entry title'))
summary = fields.String(metadata=dict(description='Feed entry summary'))
content = fields.String(metadata=dict(description='Feed entry content'))