commit a91b564305c1780be2aca1a98d2d401de6021a72 Author: Fabio Manganiello Date: Tue Jan 11 20:16:27 2022 +0100 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..feab7a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__ +build/ +dist/ +*.egg-info diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..abf87e7 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021, 2022 Fabio Manganiello + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ee1fb5e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +recursive-include madblog/static/css * +recursive-include madblog/static/fonts * +recursive-include madblog/static/img * +recursive-include madblog/templates * diff --git a/README.md b/README.md new file mode 100644 index 0000000..d8853f2 --- /dev/null +++ b/README.md @@ -0,0 +1,103 @@ +# mdblog + +This project provides a minimal blogging platform based on Markdown files. + +## Installation + +```shell +$ python setup.py install +``` + +## Usage + +```shell +# The application will listen on port 8000 and it will +# serve the current folder +$ madness +``` + +``` +usage: madblog [-h] [--host HOST] [--port PORT] [--debug] [path] + +Serve a Markdown folder as a web blog. + +The folder should have the following structure: + +. + -> markdown + -> article-1.md + -> article-2.md + -> ... + -> img [recommended] + -> favicon.ico + -> icon.png + -> image-1.png + -> image-2.png + -> ... + -> css [optional] + -> custom-1.css + -> custom-2.css + -> ... + -> fonts [optional] + -> custom-1.ttf + -> custom-1.css + -> ... + -> templates [optional] + -> index.html [for a custom index template] + -> article.html [for a custom article template] + +positional arguments: + path Base path for the blog + +options: + -h, --help show this help message and exit + --host HOST Bind host/address + --port PORT Bind port (default: 8000) + --debug Enable debug mode (default: False) +``` + +## Markdown files + +Articles are Markdown files stored under `pages`. For an article to be correctly rendered, +you need to start the Markdown file with the following metadata header: + +```markdown +[//]: # (title: Title of the article) +[//]: # (description: Short description of the content) +[//]: # (image: /img/some-header-image.png) +[//]: # (author: Author Name ) +[//]: # (published: 2022-01-01) +``` + +## Images + +Images are stored under `img`. You can reference them in your articles through the following syntax: + +```markdown +![image description](/img/image.png) +``` + +You can also drop your `favicon.ico` under this folder. + +## LaTeX support + +LaTeX support is built-in as long as you have the `latex` executable installed on your server. + +Syntax for inline LaTeX: + +```markdown +And we can therefore prove that \( c^2 = a^2 + b^2 \) +``` + +Syntax for LaTeX expression on a new line: + +```markdown +$$ +c^2 = a^2 + b^2 +$$ +``` + +## RSS syndacation + +RSS feeds for the blog are provided under the `/rss` URL. + diff --git a/madblog/__init__.py b/madblog/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/madblog/__main__.py b/madblog/__main__.py new file mode 100644 index 0000000..5567e98 --- /dev/null +++ b/madblog/__main__.py @@ -0,0 +1,4 @@ +from .cli import run + +if __name__ == '__main__': + run() diff --git a/madblog/app.py b/madblog/app.py new file mode 100644 index 0000000..b1469e0 --- /dev/null +++ b/madblog/app.py @@ -0,0 +1,103 @@ +import datetime +import os +import re +from glob import glob +from typing import Optional + +from flask import Flask, abort, render_template +from markdown import markdown + +from .config import config +from .latex import MarkdownLatex + + +class BlogApp(Flask): + def __init__(self, *args, **kwargs): + super().__init__(*args, template_folder=config.templates_dir, **kwargs) + self.pages_dir = os.path.join(config.content_dir, 'markdown') + self.img_dir = config.default_img_dir + self.css_dir = config.default_css_dir + self.fonts_dir = config.default_fonts_dir + + if not os.path.isdir(self.pages_dir): + raise FileNotFoundError(self.pages_dir) + + img_dir = os.path.join(config.content_dir, 'img') + if os.path.isdir(img_dir): + self.img_dir = img_dir + + css_dir = os.path.join(config.content_dir, 'css') + if os.path.isdir(css_dir): + self.css_dir = css_dir + + fonts_dir = os.path.join(config.content_dir, 'fonts') + if os.path.isdir(fonts_dir): + self.fonts_dir = fonts_dir + + templates_dir = os.path.join(config.content_dir, 'templates') + if os.path.isdir(templates_dir): + self.template_folder = templates_dir + + def get_page_metadata(self, page: str) -> dict: + if not page.endswith('.md'): + page = page + '.md' + + if not os.path.isfile(os.path.join(self.pages_dir, page)): + abort(404) + + metadata = {} + with open(os.path.join(self.pages_dir, page), 'r') as f: + metadata['uri'] = '/article/' + page[:-3] + + for line in f.readlines(): + if not line: + continue + + if not (m := re.match(r'^\[//]: # \(([^:]+):\s*([^)]+)\)\s*$', line)): + break + + if m.group(1) == 'published': + metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2)) + else: + metadata[m.group(1)] = m.group(2) + + return metadata + + def get_page(self, page: str, title: Optional[str] = None, skip_header: bool = False): + if not page.endswith('.md'): + page = page + '.md' + + metadata = self.get_page_metadata(page) + with open(os.path.join(self.pages_dir, page), 'r') as f: + return render_template( + 'article.html', + config=config, + title=title if title else metadata.get('title', config.title), + image=metadata.get('image'), + description=metadata.get('description'), + author=re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1] if 'author' in metadata else None, + author_email=re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2] if 'author' in metadata else None, + published=(metadata['published'].strftime('%b %d, %Y') + if metadata.get('published') else None), + content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]), + skip_header=skip_header + ) + + def get_pages(self, with_content: bool = False, skip_header: bool = False) -> list: + return sorted([ + { + 'path': path, + 'content': self.get_page(path, skip_header=skip_header) if with_content else '', + **self.get_page_metadata(os.path.basename(path)), + } + for path in glob(os.path.join(app.pages_dir, '*.md')) + ], key=lambda page: page.get('published'), reverse=True) + + +app = BlogApp(__name__) + + +from .routes import * + + +# vim:sw=4:ts=4:et: diff --git a/madblog/cli.py b/madblog/cli.py new file mode 100644 index 0000000..79e7fdc --- /dev/null +++ b/madblog/cli.py @@ -0,0 +1,55 @@ +import argparse +import os +import sys + +def get_args(): + parser = argparse.ArgumentParser(description='''Serve a Markdown folder as a web blog. + +The folder should have the following structure: + +. + -> config.yaml [recommended] + -> markdown + -> article-1.md + -> article-2.md + -> ... + -> img [recommended] + -> favicon.ico + -> icon.png + -> image-1.png + -> image-2.png + -> ... + -> css [optional] + -> custom-1.css + -> custom-2.css + -> ... + -> fonts [optional] + -> custom-1.ttf + -> custom-1.css + -> ... + -> templates [optional] + -> index.html [for a custom index template] + -> article.html [for a custom article template] + +''', formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('dir', nargs='?', default='.', help='Base path for the blog (default: current directory)') + parser.add_argument('--config', dest='config', default='config.yaml', required=False, help='Path to a configuration file (default: config.yaml in the blog root directory)') + parser.add_argument('--host', dest='host', required=False, default='0.0.0.0', help='Bind host/address') + parser.add_argument('--port', dest='port', required=False, type=int, default=8000, help='Bind port (default: 8000)') + parser.add_argument('--debug', dest='debug', required=False, action='store_true', default=False, + help='Enable debug mode (default: False)') + + return parser.parse_known_args(sys.argv[1:]) + + +def run(): + from .config import init_config + opts, _ = get_args() + config_file = os.path.join(opts.dir, 'config.yaml') + init_config(config_file=config_file, content_dir=opts.dir) + + from .app import app + app.run(host=opts.host, port=opts.port, debug=opts.debug) + + +# vim:sw=4:ts=4:et: diff --git a/madblog/config.py b/madblog/config.py new file mode 100644 index 0000000..bd066a7 --- /dev/null +++ b/madblog/config.py @@ -0,0 +1,54 @@ +import os +import yaml +from typing import Optional + +from dataclasses import dataclass + + +@dataclass +class Config: + title = 'Blog' + description = '' + link = '/' + home_link = '/' + language = 'en-US' + logo = '/img/icon.png' + content_dir = None + categories = None + + basedir = os.path.abspath(os.path.dirname(__file__)) + templates_dir = os.path.join(basedir, 'templates') + static_dir = os.path.join(basedir, 'static') + default_css_dir = os.path.join(static_dir, 'css') + default_fonts_dir = os.path.join(static_dir, 'fonts') + default_img_dir = os.path.join(static_dir, 'img') + + +config = Config() + + +def init_config(content_dir='.', config_file='config.yaml'): + cfg = {} + config.content_dir = content_dir + + if os.path.isfile(config_file): + with open(config_file, 'r') as f: + cfg = yaml.safe_load(f) + + if cfg.get('title'): + config.title = cfg['title'] + if cfg.get('description'): + config.description = cfg['description'] + if cfg.get('link'): + config.link = cfg['link'] + if cfg.get('home_link'): + config.home_link = cfg['home_link'] + if cfg.get('logo'): + config.logo = cfg['logo'] + if cfg.get('language'): + config.language = cfg['language'] + + config.categories = cfg.get('categories', []) + + +# vim:sw=4:ts=4:et: diff --git a/madblog/latex.py b/madblog/latex.py new file mode 100644 index 0000000..f43b895 --- /dev/null +++ b/madblog/latex.py @@ -0,0 +1,248 @@ +""" +Licensed under Public Domain Mark 1.0. +See https://creativecommons.org/publicdomain/mark/1.0/ +Author: Justin Bruce Van Horne + +Python-Markdown LaTeX Extension +Adds support for $math mode$ and %text mode%. This plugin supports +multiline equations/text. +The actual image generation is done via LaTeX/DVI output. +It encodes data as base64 so there is no need for images directly. +All the work is done in the preprocessor. +""" + +import base64 +import hashlib +import json +import os +import re +import tempfile +from subprocess import call as rawcall, PIPE + +import markdown + + +def call(*args, **kwargs): + """ + Proxy to subprocess.call(), removes timeout argument in case of + Python2 because that was only implemented in Python3. + """ + return rawcall(*args, **kwargs) + + +# Defines our basic inline image +img_expr = '%s' + +# Defines multiline expression image +multiline_img_expr = '''
+%s
''' + +# Base CSS template +img_css = """""" + +# Cache and temp file paths +tmpdir = tempfile.gettempdir() + '/markdown-latex' +cache_file = tmpdir + '/latex.cache' + + +class LaTeXPreprocessor(markdown.preprocessors.Preprocessor): + # These are our cached expressions that are stored in latex.cache + cached = {} + + # Basic LaTex Setup as well as our list of expressions to parse + tex_preamble = r"""\documentclass[14pt]{article} +\usepackage{amsmath} +\usepackage{amsthm} +\usepackage{amssymb} +\usepackage{bm} +\usepackage{graphicx} +\usepackage[usenames,dvipsnames]{color} +\pagestyle{empty} +""" + + # Math TeX extraction regex + math_extract_regex = re.compile(r'(.+?)((\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n))(.*)', re.MULTILINE | re.DOTALL) + + # Math TeX matching regex + math_match_regex = re.compile(r'\s*(\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n)\s*', re.MULTILINE | re.DOTALL) + + def __init__(self, *_, **__): + if not os.path.isdir(tmpdir): + os.makedirs(tmpdir) + try: + with open(cache_file, 'r') as f: + self.cached = json.load(f) + except (IOError, json.JSONDecodeError): + self.cached = {} + + self.config = { + ("general", "preamble"): "", + ("dvipng", "args"): "-q -T tight -bg Transparent -z 9 -D 200", + ("delimiters", "text"): "%", + ("delimiters", "math"): "$", + ("delimiters", "preamble"): "%%"} + + def _latex_to_base64(self, tex): + """Generates a base64 representation of TeX string""" + + # Generate the temporary file + tmp_file_fd, path = tempfile.mkstemp(dir=tmpdir) + with os.fdopen(tmp_file_fd, "w") as tmp_file: + tmp_file.write(self.tex_preamble) + tmp_file.write(tex) + tmp_file.write('\n\\end{document}') + + # compile LaTeX document. A DVI file is created + status = call(('latex -halt-on-error -output-directory={:s} {:s}' + .format(tmpdir, path)).split(), + stdout=PIPE, timeout=10) + + # clean up if the above failed + if status: + self._cleanup(path, err=True) + raise Exception("Couldn't compile LaTeX document." + + "Please read '%s.log' for more detail." % path) + + # Run dvipng on the generated DVI file. Use tight bounding box. + # Magnification is set to 1200 + dvi = "%s.dvi" % path + png = "%s.png" % path + + # Extract the image + cmd = "dvipng %s %s -o %s" % (self.config[("dvipng", "args")], dvi, png) + status = call(cmd.split(), stdout=PIPE) + + # clean up if we couldn't make the above work + if status: + self._cleanup(path, err=True) + raise Exception("Couldn't convert LaTeX to image." + + "Please read '%s.log' for more detail." % path) + + # Read the png and encode the data + try: + with open(png, "rb") as png: + data = png.read() + return base64.b64encode(data) + finally: + self._cleanup(path) + + @staticmethod + def _cleanup(path, err=False): + # don't clean up the log if there's an error + extensions = ["", ".aux", ".dvi", ".png", ".log"] + if err: + extensions.pop() + + # now do the actual cleanup, passing on non-existent files + for extension in extensions: + try: + os.remove("%s%s" % (path, extension)) + except (IOError, OSError): + pass + + def run(self, lines): + """Parses the actual page""" + # Checks for the LaTeX header + use_latex = any(line == '[//]: # (latex: 1)' for line in lines) + if not use_latex: + return lines + + # Re-creates the entire page so we can parse in a multiline env. + page = "\n".join(lines) + + # Adds a preamble mode + self.tex_preamble += self.config[("general", "preamble")] + "\n\\begin{document}\n" + + # Figure out our text strings and math-mode strings + tex_expr = self.math_extract_regex.findall(page) + + # No sense in doing the extra work + if not len(tex_expr): + return page.split("\n") + + # Parse the expressions + new_cache = {} + new_page = '' + n_multiline_expressions = 0 + + while page: + m = self.math_extract_regex.match(page) + if not m: + new_page += page + break + + new_page += m.group(1) + math_match = self.math_match_regex.match(m.group(2)) + if not math_match: + new_page += m.group(2) + else: + expr = m.group(2) + is_multiline = math_match.group(2) is not None + tex_hash = self.hash(expr) + if tex_hash in self.cached: + data = self.cached[tex_hash] + else: + data = self._latex_to_base64(expr).decode() + new_cache[tex_hash] = data + + if is_multiline and n_multiline_expressions > 0: + new_page += '

' + new_page += (multiline_img_expr if is_multiline else img_expr) % ('true', expr, tex_hash, data) + + if is_multiline: + new_page += '

' + n_multiline_expressions += 1 + + page = m.group(5) + + if n_multiline_expressions > 0: + new_page += '

' + + # Cache our data + self.cached.update(new_cache) + with open(cache_file, 'w') as f: + json.dump(self.cached, f) + + # Make sure to re-split the lines + return new_page.split("\n") + + @staticmethod + def hash(tex: str) -> str: + return hashlib.sha1(tex.encode()).hexdigest() + + +class LaTeXPostprocessor(markdown.postprocessors.Postprocessor): + """This post processor extension just allows us to further + refine, if necessary, the document after it has been parsed.""" + + # noinspection PyMethodMayBeStatic + def run(self, text): + # Inline a style for default behavior + text = img_css + text + return text + + +class MarkdownLatex(markdown.Extension): + """Wrapper for LaTeXPreprocessor""" + + def extendMarkdown(self, md): + # Our base LaTeX extension + md.preprocessors.add('latex', + LaTeXPreprocessor(self), ">html_block") + # Our cleanup postprocessing extension + md.postprocessors.add('latex', + LaTeXPostprocessor(self), ">amp_substitute") diff --git a/madblog/routes.py b/madblog/routes.py new file mode 100644 index 0000000..57f7b16 --- /dev/null +++ b/madblog/routes.py @@ -0,0 +1,96 @@ +import os +from typing import Optional + +from flask import Response, send_from_directory as send_from_directory_, render_template + +from .app import app +from .config import config + + +def send_from_directory(path: str, file: str, alternative_path: Optional[str] = None, *args, **kwargs): + if not os.path.exists(os.path.join(path, file)) and alternative_path: + path = alternative_path + return send_from_directory_(path, file, *args, **kwargs) + + +@app.route('/', methods=['GET']) +def home_route(): + return render_template('index.html', pages=app.get_pages(), config=config) + + +@app.route('/img/', methods=['GET']) +def img_route(img: str): + return send_from_directory(app.img_dir, img, config.default_img_dir) + + +@app.route('/favicon.ico', methods=['GET']) +def favicon_route(): + return img_route('favicon.ico') + + +@app.route('/css/