Refactored RSS integration into its own rss
plugin [closes #199]
This commit is contained in:
parent
3e4b91cd6c
commit
e9f6d9a8bc
20 changed files with 312 additions and 30 deletions
|
@ -50,6 +50,7 @@ Events
|
||||||
platypush/events/ping.rst
|
platypush/events/ping.rst
|
||||||
platypush/events/pushbullet.rst
|
platypush/events/pushbullet.rst
|
||||||
platypush/events/qrcode.rst
|
platypush/events/qrcode.rst
|
||||||
|
platypush/events/rss.rst
|
||||||
platypush/events/scard.rst
|
platypush/events/scard.rst
|
||||||
platypush/events/sensor.rst
|
platypush/events/sensor.rst
|
||||||
platypush/events/sensor.ir.rst
|
platypush/events/sensor.ir.rst
|
||||||
|
|
5
docs/source/platypush/events/rss.rst
Normal file
5
docs/source/platypush/events/rss.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``platypush.message.event.rss``
|
||||||
|
===============================
|
||||||
|
|
||||||
|
.. automodule:: platypush.message.event.rss
|
||||||
|
:members:
|
5
docs/source/platypush/plugins/rss.rst
Normal file
5
docs/source/platypush/plugins/rss.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
``rss``
|
||||||
|
=======
|
||||||
|
|
||||||
|
.. automodule:: platypush.plugins.rss
|
||||||
|
:members:
|
|
@ -103,6 +103,7 @@ Plugins
|
||||||
platypush/plugins/pwm.pca9685.rst
|
platypush/plugins/pwm.pca9685.rst
|
||||||
platypush/plugins/qrcode.rst
|
platypush/plugins/qrcode.rst
|
||||||
platypush/plugins/redis.rst
|
platypush/plugins/redis.rst
|
||||||
|
platypush/plugins/rss.rst
|
||||||
platypush/plugins/rtorrent.rst
|
platypush/plugins/rtorrent.rst
|
||||||
platypush/plugins/serial.rst
|
platypush/plugins/serial.rst
|
||||||
platypush/plugins/shell.rst
|
platypush/plugins/shell.rst
|
||||||
|
|
|
@ -7,6 +7,9 @@ from platypush.backend.http.request import HttpRequest
|
||||||
|
|
||||||
class HttpPollBackend(Backend):
|
class HttpPollBackend(Backend):
|
||||||
"""
|
"""
|
||||||
|
WARNING: This integration is deprecated, since it was practically only used for RSS subscriptions.
|
||||||
|
RSS feeds integration has been replaced by :class:`platypush.plugins.rss.RSSPlugin`.
|
||||||
|
|
||||||
This backend will poll multiple HTTP endpoints/services and return events
|
This backend will poll multiple HTTP endpoints/services and return events
|
||||||
the bus whenever something new happened. Supported types:
|
the bus whenever something new happened. Supported types:
|
||||||
:class:`platypush.backend.http.request.JsonHttpRequest` (for polling updates on
|
:class:`platypush.backend.http.request.JsonHttpRequest` (for polling updates on
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
platypush/backend/http/webapp/dist/static/css/chunk-7babe442.e5958b5e.css
vendored
Normal file
1
platypush/backend/http/webapp/dist/static/css/chunk-7babe442.e5958b5e.css
vendored
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,2 +0,0 @@
|
||||||
(window["webpackJsonp"]=window["webpackJsonp"]||[]).push([["chunk-75e68c24"],{"0709":function(e,t,r){"use strict";r("84c2")},"84c2":function(e,t,r){},c306:function(e,t,r){"use strict";r.r(t);var n=r("7a23"),s=Object(n["K"])("data-v-1b599aef");Object(n["u"])("data-v-1b599aef");var i={class:"rss-news"},c={key:0,class:"article"};Object(n["s"])();var u=s((function(e,t,r,s,u,a){return Object(n["r"])(),Object(n["e"])("div",i,[e.currentArticle?(Object(n["r"])(),Object(n["e"])("div",c,[Object(n["h"])("div",{class:"source",textContent:Object(n["C"])(e.currentArticle.source)},null,8,["textContent"]),Object(n["h"])("div",{class:"title",textContent:Object(n["C"])(e.currentArticle.title)},null,8,["textContent"]),Object(n["h"])("div",{class:"published",textContent:Object(n["C"])(new Date(e.currentArticle.published).toDateString()+", "+new Date(e.currentArticle.published).toTimeString().substring(0,5))},null,8,["textContent"])])):Object(n["f"])("",!0)])})),a=r("2909"),l=r("1da1"),o=(r("96cf"),r("a9e3"),r("b680"),r("3e54")),d={name:"RssNews",mixins:[o["a"]],props:{db:{type:String,required:!0},limit:{type:Number,required:!1,default:25},refreshSeconds:{type:Number,required:!1,default:15}},data:function(){return{articles:[],queue:[],currentArticle:void 0}},methods:{refresh:function(){var e=Object(l["a"])(regeneratorRuntime.mark((function e(){return regeneratorRuntime.wrap((function(e){while(1)switch(e.prev=e.next){case 0:if(this.queue.length){e.next=5;break}return e.next=3,this.request("db.select",{engine:this.db,query:"\n select s.title as source, e.title, e.summary,\n strftime('%Y-%m-%dT%H:%M:%fZ', e.published) as published\n from FeedEntry e join FeedSource s\n on e.source_id = s.id order by e.published desc limit ".concat(this.limit)});case 3:this.articles=e.sent,this.queue=Object(a["a"])(this.articles);case 5:if(this.queue.length){e.next=7;break}return e.abrupt("return");case 7:this.currentArticle=this.queue.pop();case 8:case"end":return e.stop()}}),e,this)})));function t(){return e.apply(this,arguments)}return t}()},mounted:function(){this.refresh(),setInterval(this.refresh,parseInt((1e3*this.refreshSeconds).toFixed(0)))}};r("0709");d.render=u,d.__scopeId="data-v-1b599aef";t["default"]=d}}]);
|
|
||||||
//# sourceMappingURL=chunk-75e68c24.6b249468.js.map
|
|
File diff suppressed because one or more lines are too long
2
platypush/backend/http/webapp/dist/static/js/chunk-7babe442.e3a7971d.js
vendored
Normal file
2
platypush/backend/http/webapp/dist/static/js/chunk-7babe442.e3a7971d.js
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
(window["webpackJsonp"]=window["webpackJsonp"]||[]).push([["chunk-7babe442"],{"0d43":function(e,t,r){"use strict";r("ddbd")},c306:function(e,t,r){"use strict";r.r(t);var n=r("7a23"),s=Object(n["K"])("data-v-52a823f4");Object(n["u"])("data-v-52a823f4");var i={class:"rss-news"},c={key:0,class:"article"};Object(n["s"])();var u=s((function(e,t,r,s,u,a){return Object(n["r"])(),Object(n["e"])("div",i,[e.currentArticle?(Object(n["r"])(),Object(n["e"])("div",c,[Object(n["h"])("div",{class:"source",textContent:Object(n["C"])(e.currentArticle.feed_title||e.currentArticle.feed_url)},null,8,["textContent"]),Object(n["h"])("div",{class:"title",textContent:Object(n["C"])(e.currentArticle.title)},null,8,["textContent"]),Object(n["h"])("div",{class:"published",textContent:Object(n["C"])(new Date(e.currentArticle.published).toDateString()+", "+new Date(e.currentArticle.published).toTimeString().substring(0,5))},null,8,["textContent"])])):Object(n["f"])("",!0)])})),a=r("2909"),l=r("1da1"),d=(r("96cf"),r("a9e3"),r("b680"),r("3e54")),o={name:"RssNews",mixins:[d["a"]],props:{limit:{type:Number,required:!1,default:25},refreshSeconds:{type:Number,required:!1,default:15}},data:function(){return{articles:[],queue:[],currentArticle:void 0}},methods:{refresh:function(){var e=Object(l["a"])(regeneratorRuntime.mark((function e(){return regeneratorRuntime.wrap((function(e){while(1)switch(e.prev=e.next){case 0:if(this.queue.length){e.next=5;break}return e.next=3,this.request("rss.get_latest_entries",{limit:this.limit});case 3:this.articles=e.sent,this.queue=Object(a["a"])(this.articles).reverse();case 5:if(this.queue.length){e.next=7;break}return e.abrupt("return");case 7:this.currentArticle=this.queue.pop();case 8:case"end":return e.stop()}}),e,this)})));function t(){return e.apply(this,arguments)}return t}()},mounted:function(){this.refresh(),setInterval(this.refresh,parseInt((1e3*this.refreshSeconds).toFixed(0)))}};r("0d43");o.render=u,o.__scopeId="data-v-52a823f4";t["default"]=o},ddbd:function(e,t,r){}}]);
|
||||||
|
//# sourceMappingURL=chunk-7babe442.e3a7971d.js.map
|
1
platypush/backend/http/webapp/dist/static/js/chunk-7babe442.e3a7971d.js.map
vendored
Normal file
1
platypush/backend/http/webapp/dist/static/js/chunk-7babe442.e3a7971d.js.map
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"version":3,"sources":["webpack:///./src/components/widgets/RssNews/Index.vue?a9df","webpack:///./src/components/widgets/RssNews/Index.vue","webpack:///./src/components/widgets/RssNews/Index.vue?6001"],"names":["class","currentArticle","feed_title","feed_url","title","Date","published","toDateString","toTimeString","substring","name","mixins","Utils","props","limit","type","Number","required","default","refreshSeconds","data","articles","queue","undefined","methods","refresh","this","length","request","reverse","pop","mounted","setInterval","parseInt","toFixed","render","__scopeId"],"mappings":"kHAAA,W,sICCOA,MAAM,Y,SACJA,MAAM,W,wEADb,eAMM,MANN,EAMM,CALuB,EAAAC,gB,iBAA3B,eAIM,MAJN,EAIM,CAHJ,eAAwF,OAAnFD,MAAM,S,YAAS,eAA6D,EAAvC,eAACE,YAAc,EAAAD,eAAeE,W,wBACxE,eAAuD,OAAlDH,MAAM,Q,YAAQ,eAA6B,EAAP,eAACI,Q,wBAC1C,eAAkK,OAA7JJ,MAAM,Y,YAAY,eAAoI,IAAxHK,KAAK,EAAAJ,eAAeK,WAAWC,eAAY,SAAgBF,KAAK,EAAAJ,eAAeK,WAAWE,eAAeC,UAAS,O,0HAY5I,GACbC,KAAM,UACNC,OAAQ,CAACC,EAAA,MACTC,MAAO,CAELC,MAAO,CACLC,KAAMC,OACNC,UAAU,EACVC,QAAS,IAIXC,eAAgB,CACdJ,KAAMC,OACNC,UAAU,EACVC,QAAS,KAIbE,KAAM,WACJ,MAAO,CACLC,SAAU,GACVC,MAAO,GACPrB,oBAAgBsB,IAIpBC,QAAS,CACPC,QAAS,WAAF,8CAAE,iGACFC,KAAKJ,MAAMK,OADT,gCAEiBD,KAAKE,QAAQ,yBAA0B,CAC3Dd,MAAOY,KAAKZ,QAHT,OAELY,KAAKL,SAFA,OAMLK,KAAKJ,MAAQ,eAAII,KAAKL,UAAUQ,UAN3B,UASFH,KAAKJ,MAAMK,OATT,iDAYPD,KAAKzB,eAAiByB,KAAKJ,MAAMQ,MAZ1B,gDAAF,qDAAE,IAgBXC,QAAS,WACPL,KAAKD,UACLO,YAAYN,KAAKD,QAASQ,UAA8B,IAApBP,KAAKP,gBAAqBe,QAAQ,O,UC1D1E,EAAOC,OAASA,EAChB,EAAOC,UAAY,kBAEJ,gB","file":"static/js/chunk-7babe442.e3a7971d.js","sourcesContent":["export * from \"-!../../../../node_modules/mini-css-extract-plugin/dist/loader.js??ref--8-oneOf-1-0!../../../../node_modules/css-loader/dist/cjs.js??ref--8-oneOf-1-1!../../../../node_modules/vue-loader-v16/dist/stylePostLoader.js!../../../../node_modules/postcss-loader/src/index.js??ref--8-oneOf-1-2!../../../../node_modules/sass-loader/dist/cjs.js??ref--8-oneOf-1-3!../../../../node_modules/cache-loader/dist/cjs.js??ref--0-0!../../../../node_modules/vue-loader-v16/dist/index.js??ref--0-1!./Index.vue?vue&type=style&index=0&id=52a823f4&lang=scss&scoped=true\"","<template>\n <div class=\"rss-news\">\n <div class=\"article\" v-if=\"currentArticle\">\n <div class=\"source\" v-text=\"currentArticle.feed_title || currentArticle.feed_url\"></div>\n <div class=\"title\" v-text=\"currentArticle.title\"></div>\n <div class=\"published\" v-text=\"new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)\"></div>\n </div>\n </div>\n</template>\n\n<script>\nimport Utils from \"@/Utils\";\n\n/**\n * In order to use this widget you need to configure the `backend.http.poll` backend to\n * poll a list of RSS sources.\n */\nexport default {\n name: \"RssNews\",\n mixins: [Utils],\n props: {\n // Maximum number of items to be shown in a cycle.\n limit: {\n type: Number,\n required: false,\n default: 25,\n },\n\n // How long an entry should be displayed before moving to the next one.\n refreshSeconds: {\n type: Number,\n required: false,\n default: 15,\n },\n },\n\n data: function() {\n return {\n articles: [],\n queue: [],\n currentArticle: undefined,\n }\n },\n\n methods: {\n refresh: async function() {\n if (!this.queue.length) {\n this.articles = await this.request('rss.get_latest_entries', {\n limit: this.limit\n })\n\n this.queue = [...this.articles].reverse()\n }\n\n if (!this.queue.length)\n return\n\n this.currentArticle = this.queue.pop()\n },\n },\n\n mounted: function() {\n this.refresh()\n setInterval(this.refresh, parseInt((this.refreshSeconds*1000).toFixed(0)))\n },\n}\n</script>\n\n<style lang=\"scss\" scoped>\n.rss-news {\n width: 100%;\n height: 100%;\n display: flex;\n align-items: center;\n letter-spacing: .025em;\n\n .article {\n width: 90%;\n padding: 0 2em;\n\n .source {\n font-size: 1.7em;\n font-weight: bold;\n margin-bottom: .5em;\n }\n\n .title {\n font-size: 1.8em;\n font-weight: normal;\n margin-bottom: .5em;\n }\n\n .published {\n text-align: right;\n font-size: 1.1em;\n }\n }\n}\n</style>\n","import { render } from \"./Index.vue?vue&type=template&id=52a823f4&scoped=true\"\nimport script from \"./Index.vue?vue&type=script&lang=js\"\nexport * from \"./Index.vue?vue&type=script&lang=js\"\n\nimport \"./Index.vue?vue&type=style&index=0&id=52a823f4&lang=scss&scoped=true\"\nscript.render = render\nscript.__scopeId = \"data-v-52a823f4\"\n\nexport default script"],"sourceRoot":""}
|
|
@ -1,7 +1,7 @@
|
||||||
<template>
|
<template>
|
||||||
<div class="rss-news">
|
<div class="rss-news">
|
||||||
<div class="article" v-if="currentArticle">
|
<div class="article" v-if="currentArticle">
|
||||||
<div class="source" v-text="currentArticle.source"></div>
|
<div class="source" v-text="currentArticle.feed_title || currentArticle.feed_url"></div>
|
||||||
<div class="title" v-text="currentArticle.title"></div>
|
<div class="title" v-text="currentArticle.title"></div>
|
||||||
<div class="published" v-text="new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)"></div>
|
<div class="published" v-text="new Date(currentArticle.published).toDateString() + ', ' + new Date(currentArticle.published).toTimeString().substring(0,5)"></div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -12,21 +12,13 @@
|
||||||
import Utils from "@/Utils";
|
import Utils from "@/Utils";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* In order to use this widget you need to configure the `backend.http.poll` backend to
|
* In order to use this widget you need to configure the `rss` plugin
|
||||||
* poll a list of RSS sources.
|
* with a list of subscriptions.
|
||||||
*/
|
*/
|
||||||
export default {
|
export default {
|
||||||
name: "RssNews",
|
name: "RssNews",
|
||||||
mixins: [Utils],
|
mixins: [Utils],
|
||||||
props: {
|
props: {
|
||||||
// Database engine string pointing to the source of the RSS feeds.
|
|
||||||
// If not otherwise configured, you should set this to
|
|
||||||
// `sqlite:///<HOME>/.local/share/platypush/feeds/rss.db`.
|
|
||||||
db: {
|
|
||||||
type: String,
|
|
||||||
required: true,
|
|
||||||
},
|
|
||||||
|
|
||||||
// Maximum number of items to be shown in a cycle.
|
// Maximum number of items to be shown in a cycle.
|
||||||
limit: {
|
limit: {
|
||||||
type: Number,
|
type: Number,
|
||||||
|
@ -53,16 +45,11 @@ export default {
|
||||||
methods: {
|
methods: {
|
||||||
refresh: async function() {
|
refresh: async function() {
|
||||||
if (!this.queue.length) {
|
if (!this.queue.length) {
|
||||||
this.articles = await this.request('db.select', {
|
this.articles = await this.request('rss.get_latest_entries', {
|
||||||
engine: this.db,
|
limit: this.limit
|
||||||
query: `
|
|
||||||
select s.title as source, e.title, e.summary,
|
|
||||||
strftime('%Y-%m-%dT%H:%M:%fZ', e.published) as published
|
|
||||||
from FeedEntry e join FeedSource s
|
|
||||||
on e.source_id = s.id order by e.published desc limit ${this.limit}`,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
this.queue = [...this.articles]
|
this.queue = [...this.articles].reverse()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!this.queue.length)
|
if (!this.queue.length)
|
||||||
|
|
20
platypush/message/event/rss.py
Normal file
20
platypush/message/event/rss.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from platypush.message.event import Event
|
||||||
|
|
||||||
|
|
||||||
|
class NewFeedEntryEvent(Event):
|
||||||
|
"""
|
||||||
|
Event triggered when a new (RSS/Atom) feed entry is received.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, *, feed_url: str, url: str, title: Optional[str] = None, id: Optional[str] = None,
|
||||||
|
feed_title: Optional[str] = None, published: Optional[datetime] = None, summary: Optional[str] = None,
|
||||||
|
content: Optional[str] = None, **kwargs
|
||||||
|
):
|
||||||
|
super().__init__(
|
||||||
|
feed_url=feed_url, url=url, title=title, id=id, feed_title=feed_title,
|
||||||
|
published=published, summary=summary, content=content, **kwargs
|
||||||
|
)
|
|
@ -105,7 +105,6 @@ class DbPlugin(Plugin):
|
||||||
|
|
||||||
return table, engine
|
return table, engine
|
||||||
|
|
||||||
|
|
||||||
@action
|
@action
|
||||||
def select(self, query=None, table=None, filter=None, engine=None, *args, **kwargs):
|
def select(self, query=None, table=None, filter=None, engine=None, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -179,10 +178,9 @@ class DbPlugin(Plugin):
|
||||||
|
|
||||||
with engine.connect() as connection:
|
with engine.connect() as connection:
|
||||||
result = connection.execute(query)
|
result = connection.execute(query)
|
||||||
|
|
||||||
columns = result.keys()
|
columns = result.keys()
|
||||||
rows = [
|
rows = [
|
||||||
{ columns[i]: row[i] for i in range(0, len(columns)) }
|
{col: row[i] for i, col in enumerate(list(columns))}
|
||||||
for row in result.fetchall()
|
for row in result.fetchall()
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
214
platypush/plugins/rss/__init__.py
Normal file
214
platypush/plugins/rss/__init__.py
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
import datetime
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import Optional, Collection
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from platypush.context import get_bus, get_plugin
|
||||||
|
from platypush.message.event.rss import NewFeedEntryEvent
|
||||||
|
from platypush.plugins import RunnablePlugin, action
|
||||||
|
from platypush.schemas.rss import RssFeedEntrySchema
|
||||||
|
|
||||||
|
|
||||||
|
class RssPlugin(RunnablePlugin):
|
||||||
|
"""
|
||||||
|
A plugin for parsing and subscribing to RSS feeds.
|
||||||
|
|
||||||
|
Triggers:
|
||||||
|
|
||||||
|
- :class:`platypush.message.event.rss.NewFeedEntryEvent` when a new entry is received on a subscribed feed.
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
|
||||||
|
* **feedparser** (``pip install feedparser``)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' + \
|
||||||
|
'Chrome/62.0.3202.94 Safari/537.36'
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, subscriptions: Optional[Collection[str]] = None, poll_seconds: int = 300,
|
||||||
|
user_agent: str = user_agent, **kwargs
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param subscriptions: List of feeds to monitor for updates, as URLs.
|
||||||
|
:param poll_seconds: How often we should check for updates (default: 300 seconds).
|
||||||
|
:param user_agent: Custom user agent to use for the requests.
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.subscriptions = subscriptions or []
|
||||||
|
self.poll_seconds = poll_seconds
|
||||||
|
self.user_agent = user_agent
|
||||||
|
self._latest_timestamps = self._get_latest_timestamps()
|
||||||
|
self._feed_worker_queues = [queue.Queue()] * 5
|
||||||
|
self._feed_response_queue = queue.Queue()
|
||||||
|
self._feed_workers = []
|
||||||
|
self._latest_entries = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feed_latest_timestamp_varname(url: str) -> str:
|
||||||
|
return f'LATEST_FEED_TIMESTAMP[{url}]'
|
||||||
|
|
||||||
|
def _get_latest_timestamps(self) -> dict:
|
||||||
|
variable = get_plugin('variable')
|
||||||
|
return {
|
||||||
|
url: dateutil.parser.isoparse(
|
||||||
|
variable.get(
|
||||||
|
self._get_feed_latest_timestamp_varname(url)
|
||||||
|
).output.get(self._get_feed_latest_timestamp_varname(url))
|
||||||
|
)
|
||||||
|
for url in self.subscriptions
|
||||||
|
}
|
||||||
|
|
||||||
|
def _update_latest_timestamps(self) -> None:
|
||||||
|
variable = get_plugin('variable')
|
||||||
|
variable.set(**{
|
||||||
|
self._get_feed_latest_timestamp_varname(url): latest_timestamp
|
||||||
|
for url, latest_timestamp in self._latest_timestamps.items()
|
||||||
|
})
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_content(entry) -> Optional[str]:
|
||||||
|
content = getattr(entry, 'content', None)
|
||||||
|
if not content:
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(content, list):
|
||||||
|
return content[0]['value']
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
@action
|
||||||
|
def parse_feed(self, url: str):
|
||||||
|
"""
|
||||||
|
Parse a feed URL.
|
||||||
|
|
||||||
|
:param url: Feed URL.
|
||||||
|
:return: .. schema:: rss.RssFeedEntrySchema(many=True)
|
||||||
|
"""
|
||||||
|
import feedparser
|
||||||
|
|
||||||
|
feed = feedparser.parse(requests.get(url, headers={'User-Agent': self.user_agent}).text)
|
||||||
|
return RssFeedEntrySchema().dump(
|
||||||
|
sorted([
|
||||||
|
{
|
||||||
|
'feed_url': url,
|
||||||
|
'feed_title': getattr(feed.feed, 'title', None),
|
||||||
|
'id': entry.id,
|
||||||
|
'url': entry.link,
|
||||||
|
'published': datetime.datetime.fromtimestamp(time.mktime(entry.published_parsed)),
|
||||||
|
'title': entry.title,
|
||||||
|
'summary': getattr(entry, 'summary', None),
|
||||||
|
'content': self._parse_content(entry),
|
||||||
|
}
|
||||||
|
for entry in feed.entries
|
||||||
|
if getattr(entry, 'published_parsed', None)
|
||||||
|
], key=lambda e: e['published']),
|
||||||
|
many=True
|
||||||
|
)
|
||||||
|
|
||||||
|
@action
|
||||||
|
def get_latest_entries(self, limit: int = 20):
|
||||||
|
"""
|
||||||
|
Get the latest entries from the subscribed feeds, sorted by descending published date.
|
||||||
|
|
||||||
|
:param limit: Maximum number of entries to return (default: 20).
|
||||||
|
:return: .. schema:: rss.RssFeedEntrySchema(many=True)
|
||||||
|
"""
|
||||||
|
return sorted(self._latest_entries, key=lambda e: e['published'], reverse=True)[:limit]
|
||||||
|
|
||||||
|
def _feed_worker(self, q: queue.Queue):
|
||||||
|
while not self.should_stop():
|
||||||
|
try:
|
||||||
|
url = q.get(block=True, timeout=1)
|
||||||
|
except queue.Empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._feed_response_queue.put({
|
||||||
|
'url': url,
|
||||||
|
'content': self.parse_feed(url).output,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
self._feed_response_queue.put({
|
||||||
|
'url': url,
|
||||||
|
'error': e,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._feed_response_queue.put(None)
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
self._feed_workers = [
|
||||||
|
threading.Thread(target=self._feed_worker, args=(q,))
|
||||||
|
for q in self._feed_worker_queues
|
||||||
|
]
|
||||||
|
|
||||||
|
for worker in self._feed_workers:
|
||||||
|
worker.start()
|
||||||
|
|
||||||
|
self.logger.info(f'Initialized RSS plugin with {len(self.subscriptions)} subscriptions')
|
||||||
|
|
||||||
|
while not self.should_stop():
|
||||||
|
responses = {}
|
||||||
|
for i, url in enumerate(self.subscriptions):
|
||||||
|
worker_queue = self._feed_worker_queues[i % len(self._feed_worker_queues)]
|
||||||
|
worker_queue.put(url)
|
||||||
|
|
||||||
|
time_start = time.time()
|
||||||
|
timeout = 60
|
||||||
|
max_time = time_start + timeout
|
||||||
|
new_entries = []
|
||||||
|
|
||||||
|
while (
|
||||||
|
not self.should_stop() and
|
||||||
|
len(responses) < len(self.subscriptions) and
|
||||||
|
time.time() - time_start <= timeout
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
response = self._feed_response_queue.get(block=True, timeout=max_time-time_start)
|
||||||
|
except queue.Empty:
|
||||||
|
self.logger.warning('RSS parse timeout')
|
||||||
|
break
|
||||||
|
|
||||||
|
if not response:
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = response['url']
|
||||||
|
error = response.get('error')
|
||||||
|
if error:
|
||||||
|
self.logger.error(f'Could not parse feed {url}: {error}')
|
||||||
|
responses[url] = error
|
||||||
|
else:
|
||||||
|
responses[url] = response['content']
|
||||||
|
|
||||||
|
responses = {k: v for k, v in responses.items() if not isinstance(v, Exception)}
|
||||||
|
|
||||||
|
for url, response in responses.items():
|
||||||
|
latest_timestamp = self._latest_timestamps.get(url)
|
||||||
|
new_entries += response
|
||||||
|
|
||||||
|
for entry in response:
|
||||||
|
published = datetime.datetime.fromisoformat(entry['published'])
|
||||||
|
if not latest_timestamp or published > latest_timestamp:
|
||||||
|
latest_timestamp = published
|
||||||
|
get_bus().post(NewFeedEntryEvent(**entry))
|
||||||
|
|
||||||
|
self._latest_timestamps[url] = latest_timestamp
|
||||||
|
|
||||||
|
self._update_latest_timestamps()
|
||||||
|
self._latest_entries = new_entries
|
||||||
|
time.sleep(self.poll_seconds)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
super().stop()
|
||||||
|
for worker in self._feed_workers:
|
||||||
|
worker.join(timeout=60)
|
||||||
|
|
||||||
|
self.logger.info('RSS integration stopped')
|
||||||
|
|
||||||
|
|
||||||
|
# vim:sw=4:ts=4:et:
|
8
platypush/plugins/rss/manifest.yaml
Normal file
8
platypush/plugins/rss/manifest.yaml
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
manifest:
|
||||||
|
events:
|
||||||
|
platypush.message.event.rss.NewFeedEntryEvent: when a new entry is received on a subscribed feed.
|
||||||
|
install:
|
||||||
|
pip:
|
||||||
|
- feedparser
|
||||||
|
package: platypush.plugins.rss
|
||||||
|
type: plugin
|
40
platypush/schemas/rss.py
Normal file
40
platypush/schemas/rss.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
from marshmallow import fields
|
||||||
|
from marshmallow.schema import Schema
|
||||||
|
|
||||||
|
from platypush.schemas import DateTime
|
||||||
|
|
||||||
|
|
||||||
|
class RssFeedEntrySchema(Schema):
|
||||||
|
feed_title = fields.String(metadata=dict(description='Feed title'))
|
||||||
|
feed_url = fields.URL(
|
||||||
|
required=True,
|
||||||
|
metadata=dict(
|
||||||
|
description='URL of the feed',
|
||||||
|
example='https://some-website/rss',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
id = fields.String(
|
||||||
|
required=True,
|
||||||
|
metadata=dict(
|
||||||
|
description='Feed entry ID',
|
||||||
|
example='1234',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
url = fields.URL(
|
||||||
|
required=True,
|
||||||
|
metadata=dict(
|
||||||
|
description='URL of the feed entry',
|
||||||
|
example='https://some-website/articles/1234',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
published = DateTime(
|
||||||
|
required=True,
|
||||||
|
metadata=dict(description='Entry published time')
|
||||||
|
)
|
||||||
|
|
||||||
|
title = fields.String(metadata=dict(description='Feed entry title'))
|
||||||
|
summary = fields.String(metadata=dict(description='Feed entry summary'))
|
||||||
|
content = fields.String(metadata=dict(description='Feed entry content'))
|
Loading…
Reference in a new issue