Compare commits

...

17 Commits

Author SHA1 Message Date
Fabio Manganiello 38b079d42d Fixed base_link 2024-04-11 02:21:09 +02:00
Fabio Manganiello 43897cc961 Don't prepend images with the blog base URL if they are already full URLs 2024-04-11 02:18:56 +02:00
Fabio Manganiello bf714a30bc Bump version: 0.2.22 → 0.2.23 2024-04-11 01:24:52 +02:00
Fabio Manganiello 89ea18a805 Allow parentheses in post headers values 2024-04-11 01:24:38 +02:00
Fabio Manganiello e3b30e6a98 Bump version: 0.2.21 → 0.2.22 2024-04-11 01:10:38 +02:00
Fabio Manganiello 817b23ac69 Always cast post datetimes to dates on render 2024-04-11 01:10:27 +02:00
Fabio Manganiello d517416077 Bump version: 0.2.20 → 0.2.21 2024-04-11 01:05:13 +02:00
Fabio Manganiello 390ca758b9 More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00
Fabio Manganiello 609dd14d90 Bump version: 0.2.19 → 0.2.20 2023-05-08 12:11:16 +02:00
Fabio Manganiello 958f4106cc Fixed broken format strings in RSS feed route 2023-05-08 12:10:15 +02:00
Fabio Manganiello 34b187d0d4 Bump version: 0.2.18 → 0.2.19 2023-05-01 23:28:05 +02:00
Fabio Manganiello 7530ecf7ba Use the value of `config.short_feed` in the `/rss` route. 2023-05-01 23:27:53 +02:00
Fabio Manganiello 0eb29e8c5a Bump version: 0.2.17 → 0.2.18 2023-05-01 23:21:00 +02:00
Fabio Manganiello 150af7f868 Added `short_feed` configuration flag. 2023-05-01 23:20:49 +02:00
Fabio Manganiello afc4e09784 Bump version: 0.2.16 → 0.2.17 2023-04-24 20:01:00 +02:00
Fabio Manganiello e06a2145c2 Updated CHANGELOG 2023-04-24 20:00:50 +02:00
Fabio Manganiello 261e5bfc0e Removed `alt` attribute from LaTeX rendered images.
The `alt` contains the LaTeX formula rendered as Unicode characters, and
that may include non-standard characters that break the RSS feed encoding.
2023-04-24 19:58:36 +02:00
9 changed files with 192 additions and 128 deletions

View File

@ -1,5 +1,15 @@
# Changelog
## 0.2.19
- Added `short_feed` configuration flag to permanently disable returning the
full content of the articles in the RSS feed.
## 0.2.16
- Removed `alt` attribute from LaTeX rendered `<img>` tags. It may generate
non-standard Unicode characters that break the RSS feed.
## 0.2.14
- Better support for PWA tags and added a default config-generated `/manifest.json`.

View File

@ -72,6 +72,8 @@ logo: /path/or/url/here
language: en-US
# Show/hide the header (default: true)
header: true
# Enable/disable the short RSS feed (default: false)
short_feed: false
categories:
- category1
@ -133,4 +135,9 @@ $$
RSS feeds for the blog are provided under the `/rss` URL.
By default, the whole HTML-rendered content of an article is returned under `rss.channel.item.description`.
If you only want to include the short description of an article in the feed, use `/rss?short` instead.
If you want the short feed (i.e. without the fully rendered article as a
description) to be always returned, then you can specify `short_feed=true` in
your configuration.

View File

@ -1 +1 @@
__version__ = '0.2.16'
__version__ = '0.2.23'

View File

@ -12,11 +12,11 @@ from ._sorters import PagesSorter, PagesSortByTime
class BlogApp(Flask):
_title_header_regex = re.compile(r'^#\s*((\[(.*)\])|(.*))')
_title_header_regex = re.compile(r"^#\s*((\[(.*)\])|(.*))")
def __init__(self, *args, **kwargs):
super().__init__(*args, template_folder=config.templates_dir, **kwargs)
self.pages_dir = os.path.join(config.content_dir, 'markdown')
self.pages_dir = os.path.join(config.content_dir, "markdown")
self.img_dir = config.default_img_dir
self.css_dir = config.default_css_dir
self.js_dir = config.default_js_dir
@ -27,73 +27,77 @@ class BlogApp(Flask):
# `config.content_dir` is treated as the root for markdown files.
self.pages_dir = config.content_dir
img_dir = os.path.join(config.content_dir, 'img')
img_dir = os.path.join(config.content_dir, "img")
if os.path.isdir(img_dir):
self.img_dir = os.path.abspath(img_dir)
else:
self.img_dir = config.content_dir
css_dir = os.path.join(config.content_dir, 'css')
css_dir = os.path.join(config.content_dir, "css")
if os.path.isdir(css_dir):
self.css_dir = os.path.abspath(css_dir)
js_dir = os.path.join(config.content_dir, 'js')
js_dir = os.path.join(config.content_dir, "js")
if os.path.isdir(js_dir):
self.js_dir = os.path.abspath(js_dir)
fonts_dir = os.path.join(config.content_dir, 'fonts')
fonts_dir = os.path.join(config.content_dir, "fonts")
if os.path.isdir(fonts_dir):
self.fonts_dir = os.path.abspath(fonts_dir)
templates_dir = os.path.join(config.content_dir, 'templates')
templates_dir = os.path.join(config.content_dir, "templates")
if os.path.isdir(templates_dir):
self.template_folder = os.path.abspath(templates_dir)
def get_page_metadata(self, page: str) -> dict:
if not page.endswith('.md'):
page = page + '.md'
if not page.endswith(".md"):
page = page + ".md"
md_file = os.path.join(self.pages_dir, page)
if not os.path.isfile(md_file):
abort(404)
metadata = {}
with open(md_file, 'r') as f:
metadata['uri'] = '/article/' + page[:-3]
with open(md_file, "r") as f:
metadata["uri"] = "/article/" + page[:-3]
for line in f.readlines():
for line in f:
if not line:
continue
if not (m := re.match(r'^\[//]: # \(([^:]+):\s*([^)]+)\)\s*$', line)):
if not (m := re.match(r"^\[//]: # \(([^:]+):\s*(.*)\)\s*$", line)):
break
if m.group(1) == 'published':
metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2))
if m.group(1) == "published":
metadata[m.group(1)] = datetime.datetime.fromisoformat(
m.group(2)
).date()
else:
metadata[m.group(1)] = m.group(2)
if not metadata.get('title'):
if not metadata.get("title"):
# If the `title` header isn't available in the file,
# infer it from the first line of the file
with open(md_file, 'r') as f:
header = ''
with open(md_file, "r") as f:
header = ""
for line in f.readlines():
header = line
break
metadata['title_inferred'] = True
metadata["title_inferred"] = True
m = self._title_header_regex.search(header)
if m:
metadata['title'] = m.group(3) or m.group(1)
metadata["title"] = m.group(3) or m.group(1)
else:
metadata['title'] = os.path.basename(md_file)
metadata["title"] = os.path.basename(md_file)
if not metadata.get('published'):
if not metadata.get("published"):
# If the `published` header isn't available in the file,
# infer it from the file's creation date
metadata['published'] = datetime.date.fromtimestamp(os.stat(md_file).st_ctime)
metadata['published_inferred'] = True
metadata["published"] = datetime.date.fromtimestamp(
os.stat(md_file).st_ctime
)
metadata["published_inferred"] = True
return metadata
@ -102,37 +106,42 @@ class BlogApp(Flask):
page: str,
title: Optional[str] = None,
skip_header: bool = False,
skip_html_head: bool = False
skip_html_head: bool = False,
):
if not page.endswith('.md'):
page = page + '.md'
if not page.endswith(".md"):
page = page + ".md"
metadata = self.get_page_metadata(page)
# Don't duplicate the page title if it's been inferred
if not (title or metadata.get('title_inferred')):
title = metadata.get('title', config.title)
if not (title or metadata.get("title_inferred")):
title = metadata.get("title", config.title)
with open(os.path.join(self.pages_dir, page), 'r') as f:
with open(os.path.join(self.pages_dir, page), "r") as f:
return render_template(
'article.html',
"article.html",
config=config,
title=title,
image=metadata.get('image'),
description=metadata.get('description'),
image=metadata.get("image"),
description=metadata.get("description"),
author=(
re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1]
if 'author' in metadata else None
),
author_email=(
re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2]
if 'author' in metadata else None
),
published=(
metadata['published'].strftime('%b %d, %Y')
if metadata.get('published') and not metadata.get('published_inferred')
re.match(r"(.+?)\s+<([^>]+>)", metadata["author"])[1]
if "author" in metadata
else None
),
content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]),
author_email=(
re.match(r"(.+?)\s+<([^>]+)>", metadata["author"])[2]
if "author" in metadata
else None
),
published=(
metadata["published"].strftime("%b %d, %Y")
if metadata.get("published")
and not metadata.get("published_inferred")
else None
),
content=markdown(
f.read(), extensions=["fenced_code", "codehilite", MarkdownLatex()]
),
skip_header=skip_header,
skip_html_head=skip_html_head,
)
@ -145,26 +154,25 @@ class BlogApp(Flask):
sorter: Type[PagesSorter] = PagesSortByTime,
reverse: bool = True,
) -> List[Tuple[int, dict]]:
pages_dir = app.pages_dir.rstrip('/')
pages_dir = app.pages_dir.rstrip("/")
pages = [
{
'path': os.path.join(root[len(pages_dir)+1:], f),
'folder': root[len(pages_dir)+1:],
'content': (
"path": os.path.join(root[len(pages_dir) + 1 :], f),
"folder": root[len(pages_dir) + 1 :],
"content": (
self.get_page(
os.path.join(root, f),
skip_header=skip_header,
skip_html_head=skip_html_head,
)
if with_content else ''
),
**self.get_page_metadata(
os.path.join(root[len(pages_dir)+1:], f)
if with_content
else ""
),
**self.get_page_metadata(os.path.join(root[len(pages_dir) + 1 :], f)),
}
for root, _, files in os.walk(pages_dir, followlinks=True)
for f in files
if f.endswith('.md')
if f.endswith(".md")
]
sorter_func = sorter(pages)

View File

@ -1,57 +1,61 @@
import os
from typing import List
import yaml
from dataclasses import dataclass
from dataclasses import dataclass, field
@dataclass
class Config:
title = 'Blog'
description = ''
link = '/'
home_link = '/'
language = 'en-US'
logo = '/img/icon.png'
title = "Blog"
description = ""
link = "/"
home_link = "/"
language = "en-US"
logo = "/img/icon.png"
header = True
content_dir = '.'
categories = None
content_dir = "."
categories: List[str] = field(default_factory=list)
short_feed = False
basedir = os.path.abspath(os.path.dirname(__file__))
templates_dir = os.path.join(basedir, 'templates')
static_dir = os.path.join(basedir, 'static')
default_css_dir = os.path.join(static_dir, 'css')
default_js_dir = os.path.join(static_dir, 'js')
default_fonts_dir = os.path.join(static_dir, 'fonts')
default_img_dir = os.path.join(static_dir, 'img')
templates_dir = os.path.join(basedir, "templates")
static_dir = os.path.join(basedir, "static")
default_css_dir = os.path.join(static_dir, "css")
default_js_dir = os.path.join(static_dir, "js")
default_fonts_dir = os.path.join(static_dir, "fonts")
default_img_dir = os.path.join(static_dir, "img")
config = Config()
def init_config(content_dir='.', config_file='config.yaml'):
def init_config(content_dir=".", config_file="config.yaml"):
cfg = {}
config.content_dir = content_dir
if os.path.isfile(config_file):
with open(config_file, 'r') as f:
with open(config_file, "r") as f:
cfg = yaml.safe_load(f)
if cfg.get('title'):
config.title = cfg['title']
if cfg.get('description'):
config.description = cfg['description']
if cfg.get('link'):
config.link = cfg['link']
if cfg.get('home_link'):
config.home_link = cfg['home_link']
if cfg.get('logo') is not None:
config.logo = cfg['logo']
if cfg.get('language'):
config.language = cfg['language']
if cfg.get('header') is False:
if cfg.get("title"):
config.title = cfg["title"]
if cfg.get("description"):
config.description = cfg["description"]
if cfg.get("link"):
config.link = cfg["link"]
if cfg.get("home_link"):
config.home_link = cfg["home_link"]
if cfg.get("logo") is not None:
config.logo = cfg["logo"]
if cfg.get("language"):
config.language = cfg["language"]
if cfg.get("header") is False:
config.header = False
if cfg.get("short_feed"):
config.short_feed = True
config.categories = cfg.get('categories', [])
config.categories = cfg.get("categories", [])
# vim:sw=4:ts=4:et:

View File

@ -31,11 +31,11 @@ def call(*args, **kwargs):
# Defines our basic inline image
img_expr = '<img class="latex inline math-%s" alt="%s" id="%s" src="data:image/png;base64,%s">'
img_expr = '<img class="latex inline math-%s" id="%s" src="data:image/png;base64,%s">'
# Defines multiline expression image
multiline_img_expr = '''<div class="multiline-wrapper">
<img class="latex multiline math-%s" alt="%s" id="%s" src="data:image/png;base64,%s"></div>'''
multiline_img_expr = """<div class="multiline-wrapper">
<img class="latex multiline math-%s" id="%s" src="data:image/png;base64,%s"></div>"""
# Base CSS template
img_css = """<style scoped>
@ -55,8 +55,8 @@ img.latex.inline {
</style>"""
# Cache and temp file paths
tmpdir = tempfile.gettempdir() + '/markdown-latex'
cache_file = tmpdir + '/latex.cache'
tmpdir = tempfile.gettempdir() + "/markdown-latex"
cache_file = tmpdir + "/latex.cache"
class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
@ -75,16 +75,20 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
"""
# Math TeX extraction regex
math_extract_regex = re.compile(r'(.+?)((\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n))(.*)', re.MULTILINE | re.DOTALL)
math_extract_regex = re.compile(
r"(.+?)((\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n))(.*)", re.MULTILINE | re.DOTALL
)
# Math TeX matching regex
math_match_regex = re.compile(r'\s*(\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n)\s*', re.MULTILINE | re.DOTALL)
math_match_regex = re.compile(
r"\s*(\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n)\s*", re.MULTILINE | re.DOTALL
)
def __init__(self, *_, **__):
if not os.path.isdir(tmpdir):
os.makedirs(tmpdir)
try:
with open(cache_file, 'r') as f:
with open(cache_file, "r") as f:
self.cached = json.load(f)
except (IOError, json.JSONDecodeError):
self.cached = {}
@ -94,7 +98,8 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
("dvipng", "args"): "-q -T tight -bg Transparent -z 9 -D 200",
("delimiters", "text"): "%",
("delimiters", "math"): "$",
("delimiters", "preamble"): "%%"}
("delimiters", "preamble"): "%%",
}
def _latex_to_base64(self, tex):
"""Generates a base64 representation of TeX string"""
@ -104,18 +109,24 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
with os.fdopen(tmp_file_fd, "w") as tmp_file:
tmp_file.write(self.tex_preamble)
tmp_file.write(tex)
tmp_file.write('\n\\end{document}')
tmp_file.write("\n\\end{document}")
# compile LaTeX document. A DVI file is created
status = call(('latex -halt-on-error -output-directory={:s} {:s}'
.format(tmpdir, path)).split(),
stdout=PIPE, timeout=10)
status = call(
(
"latex -halt-on-error -output-directory={:s} {:s}".format(tmpdir, path)
).split(),
stdout=PIPE,
timeout=10,
)
# clean up if the above failed
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't compile LaTeX document." +
"Please read '%s.log' for more detail." % path)
raise Exception(
"Couldn't compile LaTeX document."
+ "Please read '%s.log' for more detail." % path
)
# Run dvipng on the generated DVI file. Use tight bounding box.
# Magnification is set to 1200
@ -129,8 +140,10 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
# clean up if we couldn't make the above work
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't convert LaTeX to image." +
"Please read '%s.log' for more detail." % path)
raise Exception(
"Couldn't convert LaTeX to image."
+ "Please read '%s.log' for more detail." % path
)
# Read the png and encode the data
try:
@ -157,7 +170,7 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
def run(self, lines):
"""Parses the actual page"""
# Checks for the LaTeX header
use_latex = any(line == '[//]: # (latex: 1)' for line in lines)
use_latex = any(line == "[//]: # (latex: 1)" for line in lines)
if not use_latex:
return lines
@ -165,7 +178,9 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
page = "\n".join(lines)
# Adds a preamble mode
self.tex_preamble += self.config[("general", "preamble")] + "\n\\begin{document}\n"
self.tex_preamble += (
self.config[("general", "preamble")] + "\n\\begin{document}\n"
)
# Figure out our text strings and math-mode strings
tex_expr = self.math_extract_regex.findall(page)
@ -176,7 +191,7 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
# Parse the expressions
new_cache = {}
new_page = ''
new_page = ""
n_multiline_expressions = 0
while page:
@ -200,21 +215,25 @@ class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
new_cache[tex_hash] = data
if is_multiline and n_multiline_expressions > 0:
new_page += '</p>'
new_page += (multiline_img_expr if is_multiline else img_expr) % ('true', expr, tex_hash, data)
new_page += "</p>"
new_page += (multiline_img_expr if is_multiline else img_expr) % (
"true",
tex_hash,
data,
)
if is_multiline:
new_page += '<p>'
new_page += "<p>"
n_multiline_expressions += 1
page = m.group(5)
if n_multiline_expressions > 0:
new_page += '</p>'
new_page += "</p>"
# Cache our data
self.cached.update(new_cache)
with open(cache_file, 'w') as f:
with open(cache_file, "w") as f:
json.dump(self.cached, f)
# Make sure to re-split the lines
@ -230,7 +249,7 @@ class MarkdownLatex(markdown.Extension):
def extendMarkdown(self, md):
md.preprocessors.register(
LaTeXPreprocessor(self),
'latex',
LaTeXPreprocessor(self),
"latex",
1,
)

View File

@ -1,5 +1,7 @@
import os
import re
from typing import Optional
from urllib.parse import urljoin
from flask import (
jsonify,
@ -115,8 +117,12 @@ def article_route(article: str):
@app.route("/rss", methods=["GET"])
def rss_route():
pages = app.get_pages(with_content=True, skip_header=True, skip_html_head=True)
short_description = "short" in request.args
short_description = "short" in request.args or config.short_feed
pages = app.get_pages(
with_content=not short_description,
skip_header=True,
skip_html_head=True,
)
return Response(
"""<?xml version="1.0" encoding="UTF-8" ?>
@ -149,27 +155,36 @@ def rss_route():
),
items="\n\n".join(
[
"""
(
"""
<item>
<title>{title}</title>
<link>{base_link}{link}</link>
<pubDate>{published}</pubDate>
<description><![CDATA[{content}]]></description>
<media:content medium="image" url="{base_link}{image}" width="200" height="150" />
<media:content medium="image" url="{image}" width="200" height="150" />
</item>
""".format(
"""
).format(
base_link=config.link,
title=page.get("title", "[No Title]"),
link=page.get("uri", ""),
published=page["published"].strftime(
"%a, %d %b %Y %H:%M:%S GMT"
)
if "published" in page
else "",
content=page.get("description", "")
if short_description
else page.get("content", ""),
image=page.get("image", ""),
published=(
page["published"].strftime("%a, %d %b %Y %H:%M:%S GMT")
if "published" in page
else ""
),
content=(
page.get("description", "")
if short_description
else page.get("content", "")
),
image=(
urljoin(config.link, page["image"])
if page.get("image")
and not re.search(r"^https?://", page["image"])
else page.get("image", "")
),
)
for _, page in pages
]

View File

@ -1,7 +1,8 @@
[bumpversion]
current_version = 0.2.16
current_version = 0.2.23
commit = True
tag = True
[metadata]
description-file = README.md

View File

@ -11,7 +11,7 @@ def readfile(file):
setup(
name='madblog',
version='0.2.16',
version='0.2.23',
author='Fabio Manganiello',
author_email='info@fabiomanganiello.com',
description='A minimal platform for Markdown-based blogs',