madblog/madblog/app.py

import datetime
import os
import re
from typing import Optional, List, Tuple, Type

from flask import Flask, abort
from markdown import markdown

from .config import config
from .latex import MarkdownLatex
from ._sorters import PagesSorter, PagesSortByTime


class BlogApp(Flask):
    _title_header_regex = re.compile(r"^#\s*((\[(.*)\])|(.*))")

    def __init__(self, *args, **kwargs):
        super().__init__(*args, template_folder=config.templates_dir, **kwargs)
        self.pages_dir = os.path.join(config.content_dir, "markdown")
        self.img_dir = config.default_img_dir
        self.css_dir = config.default_css_dir
        self.js_dir = config.default_js_dir
        self.fonts_dir = config.default_fonts_dir

        if not os.path.isdir(self.pages_dir):
            # If the `markdown` subfolder does not exist, then the whole
            # `config.content_dir` is treated as the root for markdown files.
            self.pages_dir = config.content_dir

        img_dir = os.path.join(config.content_dir, "img")
        if os.path.isdir(img_dir):
            self.img_dir = os.path.abspath(img_dir)
        else:
            self.img_dir = config.content_dir

        css_dir = os.path.join(config.content_dir, "css")
        if os.path.isdir(css_dir):
            self.css_dir = os.path.abspath(css_dir)

        js_dir = os.path.join(config.content_dir, "js")
        if os.path.isdir(js_dir):
            self.js_dir = os.path.abspath(js_dir)

        fonts_dir = os.path.join(config.content_dir, "fonts")
        if os.path.isdir(fonts_dir):
            self.fonts_dir = os.path.abspath(fonts_dir)

        templates_dir = os.path.join(config.content_dir, "templates")
        if os.path.isdir(templates_dir):
            self.template_folder = os.path.abspath(templates_dir)

    def get_page_metadata(self, page: str) -> dict:
        if not page.endswith(".md"):
            page = page + ".md"

        md_file = os.path.join(self.pages_dir, page)
        if not os.path.isfile(md_file):
            abort(404)

        metadata = {}
        with open(md_file, "r") as f:
            metadata["uri"] = "/article/" + page[:-3]

            for line in f:
                if not line:
                    continue

                if not (m := re.match(r"^\[//]: # \(([^:]+):\s*(.*)\)\s*$", line)):
                    break

                if m.group(1) == "published":
                    metadata[m.group(1)] = datetime.datetime.fromisoformat(
                        m.group(2)
                    ).date()
                else:
                    metadata[m.group(1)] = m.group(2)

        if not metadata.get("title"):
            # If the `title` header isn't available in the file,
            # infer it from the first line of the file
            with open(md_file, "r") as f:
                header = ""
                for line in f.readlines():
                    header = line
                    break

            metadata["title_inferred"] = True
            m = self._title_header_regex.search(header)
            if m:
                metadata["title"] = m.group(3) or m.group(1)
            else:
                metadata["title"] = os.path.basename(md_file)

        if not metadata.get("published"):
            # If the `published` header isn't available in the file,
            # infer it from the file's creation date
            metadata["published"] = datetime.date.fromtimestamp(
                os.stat(md_file).st_ctime
            )
            metadata["published_inferred"] = True

        return metadata

    def get_page(
        self,
        page: str,
        title: Optional[str] = None,
        skip_header: bool = False,
        skip_html_head: bool = False,
    ):
        if not page.endswith(".md"):
            page = page + ".md"

        metadata = self.get_page_metadata(page)
        # Don't duplicate the page title if it's been inferred
        if not (title or metadata.get("title_inferred")):
            title = metadata.get("title", config.title)

        with open(os.path.join(self.pages_dir, page), "r") as f:
            return render_template(
                "article.html",
                config=config,
                title=title,
                image=metadata.get("image"),
                description=metadata.get("description"),
                author=(
                    re.match(r"(.+?)\s+<([^>]+>)", metadata["author"])[1]
                    if "author" in metadata
                    else None
                ),
                author_email=(
                    re.match(r"(.+?)\s+<([^>]+)>", metadata["author"])[2]
                    if "author" in metadata
                    else None
                ),
                published=(
                    metadata["published"].strftime("%b %d, %Y")
                    if metadata.get("published")
                    and not metadata.get("published_inferred")
                    else None
                ),
                content=markdown(
                    f.read(), extensions=["fenced_code", "codehilite", MarkdownLatex()]
                ),
                skip_header=skip_header,
                skip_html_head=skip_html_head,
            )

    def get_pages(
        self,
        with_content: bool = False,
        skip_header: bool = False,
        skip_html_head: bool = False,
        sorter: Type[PagesSorter] = PagesSortByTime,
        reverse: bool = True,
    ) -> List[Tuple[int, dict]]:
        pages_dir = app.pages_dir.rstrip("/")
        pages = [
            {
                "path": os.path.join(root[len(pages_dir) + 1 :], f),
                "folder": root[len(pages_dir) + 1 :],
                "content": (
                    self.get_page(
                        os.path.join(root, f),
                        skip_header=skip_header,
                        skip_html_head=skip_html_head,
                    )
                    if with_content
                    else ""
                ),
                **self.get_page_metadata(os.path.join(root[len(pages_dir) + 1 :], f)),
            }
            for root, _, files in os.walk(pages_dir, followlinks=True)
            for f in files
            if f.endswith(".md")
        ]

        sorter_func = sorter(pages)
        pages.sort(key=sorter_func, reverse=reverse)
        return [(i, page) for i, page in enumerate(pages)]


app = BlogApp(__name__)


from .routes import *


# vim:sw=4:ts=4:et:
First commit 2022-01-11 20:16:27 +01:00			`import datetime`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`import os`
First commit 2022-01-11 20:16:27 +01:00			`import re`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`from typing import Optional, List, Tuple, Type`
First commit 2022-01-11 20:16:27 +01:00
docs refactor and minor fixes 2022-01-11 23:38:28 +01:00			`from flask import Flask, abort`
First commit 2022-01-11 20:16:27 +01:00			`from markdown import markdown`

fixed removed import 2022-01-12 00:18:30 +01:00			`from .config import config`
First commit 2022-01-11 20:16:27 +01:00			`from .latex import MarkdownLatex`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`from ._sorters import PagesSorter, PagesSortByTime`
First commit 2022-01-11 20:16:27 +01:00

			`class BlogApp(Flask):`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`_title_header_regex = re.compile(r"^#\s((\[(.)\])\|(.*))")`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00
First commit 2022-01-11 20:16:27 +01:00			`def __init__(self, args, *kwargs):`
			`super().__init__(args, template_folder=config.templates_dir, *kwargs)`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`self.pages_dir = os.path.join(config.content_dir, "markdown")`
First commit 2022-01-11 20:16:27 +01:00			`self.img_dir = config.default_img_dir`
			`self.css_dir = config.default_css_dir`
Added support for PWA logic 2022-06-14 10:25:57 +02:00			`self.js_dir = config.default_js_dir`
First commit 2022-01-11 20:16:27 +01:00			`self.fonts_dir = config.default_fonts_dir`

			`if not os.path.isdir(self.pages_dir):`
More flexible structure for the content directory. If the `markdown` subfolder does not exist, then the whole `config.content_dir` is treated as the root for markdown files. 2022-06-12 00:01:51 +02:00			# If the `markdown` subfolder does not exist, then the whole
			# `config.content_dir` is treated as the root for markdown files.
If img_dir is not found under content_dir then treat content_dir itself as img_dir 2022-06-14 00:44:01 +02:00			`self.pages_dir = config.content_dir`
First commit 2022-01-11 20:16:27 +01:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`img_dir = os.path.join(config.content_dir, "img")`
First commit 2022-01-11 20:16:27 +01:00			`if os.path.isdir(img_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.img_dir = os.path.abspath(img_dir)`
If img_dir is not found under content_dir then treat content_dir itself as img_dir 2022-06-14 00:44:01 +02:00			`else:`
			`self.img_dir = config.content_dir`
First commit 2022-01-11 20:16:27 +01:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`css_dir = os.path.join(config.content_dir, "css")`
First commit 2022-01-11 20:16:27 +01:00			`if os.path.isdir(css_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.css_dir = os.path.abspath(css_dir)`
First commit 2022-01-11 20:16:27 +01:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`js_dir = os.path.join(config.content_dir, "js")`
Added support for PWA logic 2022-06-14 10:25:57 +02:00			`if os.path.isdir(js_dir):`
			`self.js_dir = os.path.abspath(js_dir)`

More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`fonts_dir = os.path.join(config.content_dir, "fonts")`
First commit 2022-01-11 20:16:27 +01:00			`if os.path.isdir(fonts_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.fonts_dir = os.path.abspath(fonts_dir)`
First commit 2022-01-11 20:16:27 +01:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`templates_dir = os.path.join(config.content_dir, "templates")`
First commit 2022-01-11 20:16:27 +01:00			`if os.path.isdir(templates_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.template_folder = os.path.abspath(templates_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`def get_page_metadata(self, page: str) -> dict:`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if not page.endswith(".md"):`
			`page = page + ".md"`
First commit 2022-01-11 20:16:27 +01:00
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`md_file = os.path.join(self.pages_dir, page)`
			`if not os.path.isfile(md_file):`
First commit 2022-01-11 20:16:27 +01:00			`abort(404)`

			`metadata = {}`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`with open(md_file, "r") as f:`
			`metadata["uri"] = "/article/" + page[:-3]`
First commit 2022-01-11 20:16:27 +01:00
Allow parentheses in post headers values 2024-04-11 01:24:34 +02:00			`for line in f:`
First commit 2022-01-11 20:16:27 +01:00			`if not line:`
			`continue`

Allow parentheses in post headers values 2024-04-11 01:24:34 +02:00			`if not (m := re.match(r"^\[//]: # \(([^:]+):\s(.)\)\s*$", line)):`
First commit 2022-01-11 20:16:27 +01:00			`break`

More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if m.group(1) == "published":`
Always cast post datetimes to dates on render 2024-04-11 01:10:25 +02:00			`metadata[m.group(1)] = datetime.datetime.fromisoformat(`
			`m.group(2)`
			`).date()`
First commit 2022-01-11 20:16:27 +01:00			`else:`
			`metadata[m.group(1)] = m.group(2)`

More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if not metadata.get("title"):`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			# If the `title` header isn't available in the file,
			`# infer it from the first line of the file`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`with open(md_file, "r") as f:`
			`header = ""`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`for line in f.readlines():`
			`header = line`
			`break`

More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`metadata["title_inferred"] = True`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`m = self._title_header_regex.search(header)`
			`if m:`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`metadata["title"] = m.group(3) or m.group(1)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`else:`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`metadata["title"] = os.path.basename(md_file)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if not metadata.get("published"):`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			# If the `published` header isn't available in the file,
			`# infer it from the file's creation date`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`metadata["published"] = datetime.date.fromtimestamp(`
			`os.stat(md_file).st_ctime`
			`)`
			`metadata["published_inferred"] = True`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00
First commit 2022-01-11 20:16:27 +01:00			`return metadata`

Pages rendered in the RSS feed route shouldn't include the HTML head boilerplate 2022-12-04 15:14:37 +01:00			`def get_page(`
			`self,`
			`page: str,`
			`title: Optional[str] = None,`
			`skip_header: bool = False,`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`skip_html_head: bool = False,`
Pages rendered in the RSS feed route shouldn't include the HTML head boilerplate 2022-12-04 15:14:37 +01:00			`):`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if not page.endswith(".md"):`
			`page = page + ".md"`
First commit 2022-01-11 20:16:27 +01:00
			`metadata = self.get_page_metadata(page)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`# Don't duplicate the page title if it's been inferred`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if not (title or metadata.get("title_inferred")):`
			`title = metadata.get("title", config.title)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`with open(os.path.join(self.pages_dir, page), "r") as f:`
First commit 2022-01-11 20:16:27 +01:00			`return render_template(`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`"article.html",`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`config=config,`
			`title=title,`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`image=metadata.get("image"),`
			`description=metadata.get("description"),`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`author=(`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`re.match(r"(.+?)\s+<([^>]+>)", metadata["author"])[1]`
			`if "author" in metadata`
			`else None`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`),`
			`author_email=(`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`re.match(r"(.+?)\s+<([^>]+)>", metadata["author"])[2]`
			`if "author" in metadata`
			`else None`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`),`
			`published=(`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`metadata["published"].strftime("%b %d, %Y")`
			`if metadata.get("published")`
			`and not metadata.get("published_inferred")`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`else None`
			`),`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`content=markdown(`
			`f.read(), extensions=["fenced_code", "codehilite", MarkdownLatex()]`
			`),`
Pages rendered in the RSS feed route shouldn't include the HTML head boilerplate 2022-12-04 15:14:37 +01:00			`skip_header=skip_header,`
			`skip_html_head=skip_html_head,`
First commit 2022-01-11 20:16:27 +01:00			`)`

Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`def get_pages(`
			`self,`
			`with_content: bool = False,`
			`skip_header: bool = False,`
Pages rendered in the RSS feed route shouldn't include the HTML head boilerplate 2022-12-04 15:14:37 +01:00			`skip_html_head: bool = False,`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`sorter: Type[PagesSorter] = PagesSortByTime,`
			`reverse: bool = True,`
			`) -> List[Tuple[int, dict]]:`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`pages_dir = app.pages_dir.rstrip("/")`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`pages = [`
			`{`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`"path": os.path.join(root[len(pages_dir) + 1 :], f),`
			`"folder": root[len(pages_dir) + 1 :],`
			`"content": (`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`self.get_page(`
			`os.path.join(root, f),`
Pages rendered in the RSS feed route shouldn't include the HTML head boilerplate 2022-12-04 15:14:37 +01:00			`skip_header=skip_header,`
			`skip_html_head=skip_html_head,`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`)`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if with_content`
			`else ""`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`),`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`**self.get_page_metadata(os.path.join(root[len(pages_dir) + 1 :], f)),`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`}`
			`for root, _, files in os.walk(pages_dir, followlinks=True)`
			`for f in files`
More resilient logic to handle both date and datetime timestamps in a post metadata 2024-04-11 01:04:13 +02:00			`if f.endswith(".md")`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`]`

			`sorter_func = sorter(pages)`
			`pages.sort(key=sorter_func, reverse=reverse)`
			`return [(i, page) for i, page in enumerate(pages)]`
First commit 2022-01-11 20:16:27 +01:00

			`app = BlogApp(__name__)`


			`from .routes import *`


			`# vim:sw=4:ts=4:et:`