From 4bab9d26071c87066d2a1ba98b06f2d4c05f2357 Mon Sep 17 00:00:00 2001
From: Fabio Manganiello <info@fabiomanganiello.com>
Date: Thu, 29 Sep 2022 10:51:16 +0200
Subject: [PATCH] [#224] Implemented Wallabag integration

---
 CHANGELOG.md                               |   6 +
 docs/source/platypush/plugins/wallabag.rst |   5 +
 docs/source/plugins.rst                    |   1 +
 platypush/plugins/wallabag/__init__.py     | 405 +++++++++++++++++++++
 platypush/plugins/wallabag/manifest.yaml   |   3 +
 platypush/schemas/wallabag.py              | 147 ++++++++
 6 files changed, 567 insertions(+)
 create mode 100644 docs/source/platypush/plugins/wallabag.rst
 create mode 100644 platypush/plugins/wallabag/__init__.py
 create mode 100644 platypush/plugins/wallabag/manifest.yaml
 create mode 100644 platypush/schemas/wallabag.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ee36b106..3804ba2b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 Given the high speed of development in the first phase, changes are being
 reported only starting from v0.20.2.
 
+## [Unreleased]
+
+### Added
+
+- Added [Wallabag integration](https://git.platypush.tech/platypush/platypush/issues/224).
+
 ## [0.23.6] - 2022-09-19
 
 ### Fixed
diff --git a/docs/source/platypush/plugins/wallabag.rst b/docs/source/platypush/plugins/wallabag.rst
new file mode 100644
index 000000000..2a6a54aca
--- /dev/null
+++ b/docs/source/platypush/plugins/wallabag.rst
@@ -0,0 +1,5 @@
+``wallabag``
+============
+
+.. automodule:: platypush.plugins.wallabag
+    :members:
diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst
index 436232a29..ba4c0ace9 100644
--- a/docs/source/plugins.rst
+++ b/docs/source/plugins.rst
@@ -138,6 +138,7 @@ Plugins
     platypush/plugins/user.rst
     platypush/plugins/utils.rst
     platypush/plugins/variable.rst
+    platypush/plugins/wallabag.rst
     platypush/plugins/weather.buienradar.rst
     platypush/plugins/weather.darksky.rst
     platypush/plugins/weather.openweathermap.rst
diff --git a/platypush/plugins/wallabag/__init__.py b/platypush/plugins/wallabag/__init__.py
new file mode 100644
index 000000000..776e3afe6
--- /dev/null
+++ b/platypush/plugins/wallabag/__init__.py
@@ -0,0 +1,405 @@
+import json
+import os
+import pathlib
+import requests
+import time
+
+from datetime import datetime, timedelta
+from typing import Iterable, List, Optional
+from urllib.parse import urljoin
+
+from platypush.config import Config
+from platypush.plugins import Plugin, action
+from platypush.schemas.wallabag import WallabagEntrySchema
+
+
+class WallabagPlugin(Plugin):
+    """
+    Plugin to interact with Wallabag (https://wallabag.it),
+    an open-source alternative to Instapaper and Pocket.
+    """
+
+    _default_credentials_file = os.path.join(
+        str(Config.get('workdir')), 'wallabag', 'credentials.json'
+    )
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        server_url: str = 'https://wallabag.it',
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        credentials_file: str = _default_credentials_file,
+        **kwargs,
+    ):
+        """
+        :param client_id: Client ID for your application - you can create one
+            at ``<server_url>/developer``.
+        :param client_secret: Client secret for your application - you can
+            create one at ``<server_url>/developer``.
+        :param server_url: Base URL of the Wallabag server (default: ``https://wallabag.it``).
+        :param username: Wallabag username. Only needed for the first login,
+            you can remove it afterwards. Alternatively, you can provide it
+            on the :meth:`.login` method.
+        :param password: Wallabag password. Only needed for the first login,
+            you can remove it afterwards. Alternatively, you can provide it
+            on the :meth:`.login` method.
+        :param credentials_file: Path to the file where the OAuth session
+            parameters will be stored (default:
+            ``<WORKDIR>/wallabag/credentials.json``).
+        """
+        super().__init__(**kwargs)
+        self._client_id = client_id
+        self._client_secret = client_secret
+        self._server_url = server_url
+        self._username = username
+        self._password = password
+        self._credentials_file = os.path.expanduser(credentials_file)
+        self._session = {}
+
+    def _oauth_open_saved_session(self):
+        try:
+            with open(self._credentials_file, 'r') as f:
+                data = json.load(f)
+        except Exception as e:
+            self.logger.warning('Could not load %s: %s', self._credentials_file, e)
+            return
+
+        self._session = {
+            'username': data['username'],
+            'client_id': data.get('client_id', self._client_id),
+            'client_secret': data.get('client_secret', self._client_secret),
+            'access_token': data['access_token'],
+            'refresh_token': data['refresh_token'],
+        }
+
+        if data.get('expires_at') and time.time() > data['expires_at']:
+            self.logger.info('OAuth token expired, refreshing it')
+            self._oauth_refresh_token()
+
+    def _oauth_refresh_token(self):
+        url = urljoin(self._server_url, '/oauth/v2/token')
+        rs = requests.post(
+            url,
+            json={
+                'grant_type': 'refresh_token',
+                'client_id': self._client_id,
+                'client_secret': self._client_secret,
+                'access_token': self._session['access_token'],
+                'refresh_token': self._session['refresh_token'],
+            },
+        )
+
+        rs.raise_for_status()
+        rs = rs.json()
+        self._session.update(
+            {
+                'access_token': rs['access_token'],
+                'refresh_token': rs['refresh_token'],
+                'expires_at': (
+                    int(
+                        (
+                            datetime.now() + timedelta(seconds=rs['expires_in'])
+                        ).timestamp()
+                    )
+                    if rs.get('expires_in')
+                    else None
+                ),
+            }
+        )
+
+        self._oauth_flush_session()
+
+    def _oauth_create_new_session(self, username: str, password: str):
+        url = urljoin(self._server_url, '/oauth/v2/token')
+        rs = requests.post(
+            url,
+            json={
+                'grant_type': 'password',
+                'client_id': self._client_id,
+                'client_secret': self._client_secret,
+                'username': username,
+                'password': password,
+            },
+        )
+
+        rs.raise_for_status()
+        rs = rs.json()
+        self._session = {
+            'client_id': self._client_id,
+            'client_secret': self._client_secret,
+            'username': username,
+            'access_token': rs['access_token'],
+            'refresh_token': rs['refresh_token'],
+            'expires_at': (
+                int((datetime.now() + timedelta(seconds=rs['expires_in'])).timestamp())
+                if rs.get('expires_in')
+                else None
+            ),
+        }
+
+        self._oauth_flush_session()
+
+    def _oauth_flush_session(self):
+        pathlib.Path(self._credentials_file).parent.mkdir(parents=True, exist_ok=True)
+
+        pathlib.Path(self._credentials_file).touch(mode=0o600, exist_ok=True)
+        with open(self._credentials_file, 'w') as f:
+            f.write(json.dumps(self._session))
+
+    @action
+    def login(self, username: Optional[str] = None, password: Optional[str] = None):
+        """
+        Create a new user session if not logged in.
+
+        :param username: Default ``username`` override.
+        :param password: Default ``password`` override.
+        """
+        self._oauth_open_saved_session()
+        if self._session:
+            return
+
+        username = username or self._username
+        password = password or self._password
+        assert (
+            username and password
+        ), 'No stored user session and no username/password provided'
+
+        self._oauth_create_new_session(username, password)
+
+    def _request(self, url: str, method: str, *args, as_json=True, **kwargs):
+        url = urljoin(self._server_url, f'api/{url}')
+        func = getattr(requests, method.lower())
+        self.login()
+        kwargs['headers'] = {
+            **kwargs.get('headers', {}),
+            'Authorization': f'Bearer {self._session["access_token"]}',
+        }
+
+        rs = func(url, *args, **kwargs)
+        rs.raise_for_status()
+        return rs.json() if as_json else rs.text
+
+    @action
+    def list(
+        self,
+        archived: bool = True,
+        starred: bool = False,
+        sort: str = 'created',
+        descending: bool = False,
+        page: int = 1,
+        limit: int = 30,
+        tags: Optional[Iterable[str]] = None,
+        since: Optional[int] = None,
+        full: bool = True,
+    ) -> List[dict]:
+        """
+        List saved links.
+
+        :param archived: Include archived items (default: ``True``).
+        :param starred: Include only starred items (default: ``False``).
+        :param sort: Timestamp sort criteria. Supported: ``created``,
+            ``updated``, ``archived`` (default: ``created``).
+        :param descending: Sort in descending order (default: ``False``).
+        :param page: Results page to be retrieved (default: ``1``).
+        :param limit: Maximum number of entries per page (default: ``30``).
+        :param tags: Filter by a list of tags.
+        :param since: Return entries created after this timestamp (as a UNIX
+            timestamp).
+        :param full: Include the full parsed body of the saved entry.
+        :return: .. schema:: wallabag.WallabagEntrySchema(many=True)
+        """
+        rs = self._request(
+            '/entries.json',
+            method='get',
+            params={
+                'archived': int(archived),
+                'starred': int(starred),
+                'sort': sort,
+                'order': 'desc' if descending else 'asc',
+                'page': page,
+                'perPage': limit,
+                'tags': ','.join(tags or []),
+                'since': since or 0,
+                'detail': 'full' if full else 'metadata',
+            },
+        )
+
+        return WallabagEntrySchema().dump(
+            rs.get('_embedded', {}).get('items', []), many=True
+        )
+
+    @action
+    def search(
+        self,
+        term: str,
+        page: int = 1,
+        limit: int = 30,
+    ) -> List[dict]:
+        """
+        Search links by some text.
+
+        :param term: Term to be searched.
+        :param page: Results page to be retrieved (default: ``1``).
+        :param limit: Maximum number of entries per page (default: ``30``).
+        :return: .. schema:: wallabag.WallabagEntrySchema(many=True)
+        """
+        rs = self._request(
+            '/search.json',
+            method='get',
+            params={
+                'term': term,
+                'page': page,
+                'perPage': limit,
+            },
+        )
+
+        return WallabagEntrySchema().dump(
+            rs.get('_embedded', {}).get('items', []), many=True
+        )
+
+    @action
+    def get(self, id: int) -> Optional[dict]:
+        """
+        Get the content and metadata of a link by ID.
+
+        :param id: Entry ID.
+        :return: .. schema:: wallabag.WallabagEntrySchema
+        """
+        rs = self._request(f'/entries/{id}.json', method='get')
+        return WallabagEntrySchema().dump(rs)  # type: ignore
+
+    @action
+    def export(self, id: int, file: str, format: str = 'txt'):
+        """
+        Export a saved entry to a file in the specified format.
+
+        :param id: Entry ID.
+        :param file: Output filename.
+        :param format: Output format. Supported: ``txt``, ``xml``, ``csv``,
+            ``pdf``, ``epub`` and ``mobi`` (default: ``txt``).
+        """
+        rs = self._request(
+            f'/entries/{id}/export.{format}', method='get', as_json=False
+        )
+
+        if isinstance(rs, str):
+            rs = rs.encode()
+        with open(os.path.expanduser(file), 'wb') as f:
+            f.write(rs)
+
+    @action
+    def save(
+        self,
+        url: str,
+        title: Optional[str] = None,
+        content: Optional[str] = None,
+        tags: Optional[Iterable[str]] = None,
+        authors: Optional[Iterable[str]] = None,
+        archived: bool = False,
+        starred: bool = False,
+        public: bool = False,
+        language: Optional[str] = None,
+        preview_picture: Optional[str] = None,
+    ) -> Optional[dict]:
+        """
+        Save a link to Wallabag.
+
+        :param url: URL to be saved.
+        :param title: Entry title (default: parsed from the page content).
+        :param content: Entry content (default: parsed from the entry itself).
+        :param tags: List of tags to attach to the entry.
+        :param authors: List of authors of the entry (default: parsed from the
+            page content).
+        :param archived: Whether the entry should be created in the archive
+            (default: ``False``).
+        :param starred: Whether the entry should be starred (default:
+            ``False``).
+        :param public: Whether the entry should be publicly available. If so, a
+            public URL will be generated (default: ``False``).
+        :param language: Language of the entry.
+        :param preview_picture: URL of a picture to be used for the preview
+            (default: parsed from the page itself).
+        :return: .. schema:: wallabag.WallabagEntrySchema
+        """
+        rs = self._request(
+            '/entries.json',
+            method='post',
+            json={
+                'url': url,
+                'title': title,
+                'content': content,
+                'tags': ','.join(tags or []),
+                'authors': ','.join(authors or []),
+                'archive': int(archived),
+                'starred': int(starred),
+                'public': int(public),
+                'language': language,
+                'preview_picture': preview_picture,
+            },
+        )
+
+        return WallabagEntrySchema().dump(rs)  # type: ignore
+
+    @action
+    def update(
+        self,
+        id: int,
+        title: Optional[str] = None,
+        content: Optional[str] = None,
+        tags: Optional[Iterable[str]] = None,
+        authors: Optional[Iterable[str]] = None,
+        archived: bool = False,
+        starred: bool = False,
+        public: bool = False,
+        language: Optional[str] = None,
+        preview_picture: Optional[str] = None,
+    ) -> Optional[dict]:
+        """
+        Update a link entry saved to Wallabag.
+
+        :param id: Entry ID.
+        :param title: New entry title.
+        :param content: New entry content.
+        :param tags: List of tags to attach to the entry.
+        :param authors: List of authors of the entry.
+        :param archived: Archive/unarchive the entry.
+        :param starred: Star/unstar the entry.
+        :param public: Mark the entry as public/private.
+        :param language: Change the language of the entry.
+        :param preview_picture: Change the preview picture URL.
+        :return: .. schema:: wallabag.WallabagEntrySchema
+        """
+        rs = self._request(
+            f'/entries/{id}.json',
+            method='patch',
+            json={
+                'title': title,
+                'content': content,
+                'tags': ','.join(tags or []),
+                'authors': ','.join(authors or []),
+                'archive': int(archived),
+                'starred': int(starred),
+                'public': int(public),
+                'language': language,
+                'preview_picture': preview_picture,
+            },
+        )
+
+        return WallabagEntrySchema().dump(rs)  # type: ignore
+
+    @action
+    def delete(self, id: int) -> Optional[dict]:
+        """
+        Delete an entry by ID.
+
+        :param id: Entry ID.
+        :return: .. schema:: wallabag.WallabagEntrySchema
+        """
+        rs = self._request(
+            f'/entries/{id}.json',
+            method='delete',
+        )
+
+        return WallabagEntrySchema().dump(rs)  # type: ignore
diff --git a/platypush/plugins/wallabag/manifest.yaml b/platypush/plugins/wallabag/manifest.yaml
new file mode 100644
index 000000000..cfa48a90e
--- /dev/null
+++ b/platypush/plugins/wallabag/manifest.yaml
@@ -0,0 +1,3 @@
+manifest:
+  package: platypush.plugins.wallabag
+  type: plugin
diff --git a/platypush/schemas/wallabag.py b/platypush/schemas/wallabag.py
new file mode 100644
index 000000000..337e13c52
--- /dev/null
+++ b/platypush/schemas/wallabag.py
@@ -0,0 +1,147 @@
+from marshmallow import Schema, fields
+
+from platypush.schemas import DateTime
+
+
+class WallabagSchema(Schema):
+    pass
+
+
+class WallabagAnnotationSchema(WallabagSchema):
+    id = fields.Integer(
+        required=True,
+        dump_only=True,
+        metadata={'example': 2345},
+    )
+
+    text = fields.String(
+        attribute='quote',
+        metadata={
+            'example': 'Some memorable quote',
+        },
+    )
+
+    comment = fields.String(
+        attribute='text',
+        metadata={
+            'example': 'My comment on this memorable quote',
+        },
+    )
+
+    ranges = fields.Function(
+        lambda data: [
+            [int(r['startOffset']), int(r['endOffset'])] for r in data.get('ranges', [])
+        ],
+        metadata={
+            'example': [[100, 180]],
+        },
+    )
+
+    created_at = DateTime(
+        metadata={
+            'description': 'When the annotation was created',
+        },
+    )
+
+    updated_at = DateTime(
+        metadata={
+            'description': 'When the annotation was last updated',
+        },
+    )
+
+
+class WallabagEntrySchema(WallabagSchema):
+    id = fields.Integer(
+        required=True,
+        dump_only=True,
+        metadata={'example': 1234},
+    )
+
+    url = fields.URL(
+        required=True,
+        metadata={
+            'description': 'Original URL',
+            'example': 'https://example.com/article/some-title',
+        },
+    )
+
+    preview_picture = fields.URL(
+        metadata={
+            'description': 'Preview picture URL',
+            'example': 'https://example.com/article/some-title.jpg',
+        },
+    )
+
+    is_archived = fields.Boolean()
+    is_starred = fields.Boolean()
+    is_public = fields.Boolean()
+    mimetype = fields.String(
+        metadata={
+            'example': 'text/html',
+        },
+    )
+
+    title = fields.String(
+        metadata={
+            'description': 'Title of the saved page',
+        },
+    )
+
+    content = fields.String(
+        metadata={
+            'description': 'Parsed content',
+        }
+    )
+
+    language = fields.String(
+        metadata={
+            'example': 'en',
+        }
+    )
+
+    annotations = fields.List(fields.Nested(WallabagAnnotationSchema))
+
+    published_by = fields.List(
+        fields.String,
+        metadata={
+            'example': ['Author 1', 'Author 2'],
+        },
+    )
+
+    tags = fields.Function(
+        lambda data: [tag['label'] for tag in data.get('tags', [])],
+        metadata={
+            'example': ['tech', 'programming'],
+        },
+    )
+
+    reading_time = fields.Integer(
+        metadata={
+            'description': 'Estimated reading time, in minutes',
+            'example': 10,
+        }
+    )
+
+    created_at = DateTime(
+        metadata={
+            'description': 'When the entry was created',
+        },
+    )
+
+    updated_at = DateTime(
+        metadata={
+            'description': 'When the entry was last updated',
+        },
+    )
+
+    starred_at = DateTime(
+        metadata={
+            'description': 'If the entry is starred, when was it last marked',
+        },
+    )
+
+    published_at = DateTime(
+        metadata={
+            'description': 'When the entry was initially published',
+        },
+    )