From 2914a74b751779327081b508ee104c3ed2919b58 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sat, 27 Nov 2021 01:19:55 +0100 Subject: [PATCH] Replace relative links in converted markdown --- platypush/plugins/http/webpage/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/platypush/plugins/http/webpage/__init__.py b/platypush/plugins/http/webpage/__init__.py index 02e54d343..23008890a 100644 --- a/platypush/plugins/http/webpage/__init__.py +++ b/platypush/plugins/http/webpage/__init__.py @@ -1,8 +1,10 @@ import datetime import json import os +import re import subprocess import tempfile +from urllib.parse import urlparse from platypush.plugins import action from platypush.plugins.http.request import Plugin @@ -29,6 +31,12 @@ class HttpWebpagePlugin(Plugin): with subprocess.Popen(proc, stdout=subprocess.PIPE, stderr=None) as parser: return parser.communicate()[0].decode() + @staticmethod + def _fix_relative_links(markdown: str, url: str) -> str: + url = urlparse(url) + base_url = f'{url.scheme}://{url.netloc}' + return re.sub(r'(!?\[.+?])\((/.+?)\)', f'\1({base_url}\2)', markdown) + # noinspection PyShadowingBuiltins @action def simplify(self, url, type='html', html=None, outfile=None): @@ -101,6 +109,9 @@ class HttpWebpagePlugin(Plugin): except Exception as e: raise RuntimeError('Could not parse JSON: {}. Response: {}'.format(str(e), response)) + if type == 'markdown': + self._fix_relative_links(response['content'], url) + self.logger.debug('Got response from Mercury API: {}'.format(response)) title = response.get('title', '{} on {}'.format( 'Published' if response.get('date_published') else 'Generated',