madblog/madblog/app.py

import datetime
import os
import re
from typing import Optional, List, Tuple, Type

from flask import Flask, abort
from markdown import markdown

from .config import config
from .latex import MarkdownLatex
from ._sorters import PagesSorter, PagesSortByTime


class BlogApp(Flask):
    _title_header_regex = re.compile(r'^#\s*((\[(.*)\])|(.*))')

    def __init__(self, *args, **kwargs):
        super().__init__(*args, template_folder=config.templates_dir, **kwargs)
        self.pages_dir = os.path.join(config.content_dir, 'markdown')
        self.img_dir = config.default_img_dir
        self.css_dir = config.default_css_dir
        self.js_dir = config.default_js_dir
        self.fonts_dir = config.default_fonts_dir

        if not os.path.isdir(self.pages_dir):
            # If the `markdown` subfolder does not exist, then the whole
            # `config.content_dir` is treated as the root for markdown files.
            self.pages_dir = config.content_dir

        img_dir = os.path.join(config.content_dir, 'img')
        if os.path.isdir(img_dir):
            self.img_dir = os.path.abspath(img_dir)
        else:
            self.img_dir = config.content_dir

        css_dir = os.path.join(config.content_dir, 'css')
        if os.path.isdir(css_dir):
            self.css_dir = os.path.abspath(css_dir)

        js_dir = os.path.join(config.content_dir, 'js')
        if os.path.isdir(js_dir):
            self.js_dir = os.path.abspath(js_dir)

        fonts_dir = os.path.join(config.content_dir, 'fonts')
        if os.path.isdir(fonts_dir):
            self.fonts_dir = os.path.abspath(fonts_dir)

        templates_dir = os.path.join(config.content_dir, 'templates')
        if os.path.isdir(templates_dir):
            self.template_folder = os.path.abspath(templates_dir)

    def get_page_metadata(self, page: str) -> dict:
        if not page.endswith('.md'):
            page = page + '.md'

        md_file = os.path.join(self.pages_dir, page)
        if not os.path.isfile(md_file):
            abort(404)

        metadata = {}
        with open(md_file, 'r') as f:
            metadata['uri'] = '/article/' + page[:-3]

            for line in f.readlines():
                if not line:
                    continue

                if not (m := re.match(r'^\[//]: # \(([^:]+):\s*([^)]+)\)\s*$', line)):
                    break

                if m.group(1) == 'published':
                    metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2))
                else:
                    metadata[m.group(1)] = m.group(2)

        if not metadata.get('title'):
            # If the `title` header isn't available in the file,
            # infer it from the first line of the file
            with open(md_file, 'r') as f:
                header = ''
                for line in f.readlines():
                    header = line
                    break

            metadata['title_inferred'] = True
            m = self._title_header_regex.search(header)
            if m:
                metadata['title'] = m.group(3) or m.group(1)
            else:
                metadata['title'] = os.path.basename(md_file)

        if not metadata.get('published'):
            # If the `published` header isn't available in the file,
            # infer it from the file's creation date
            metadata['published'] = datetime.date.fromtimestamp(os.stat(md_file).st_ctime)
            metadata['published_inferred'] = True

        return metadata

    def get_page(self, page: str, title: Optional[str] = None, skip_header: bool = False):
        if not page.endswith('.md'):
            page = page + '.md'

        metadata = self.get_page_metadata(page)
        # Don't duplicate the page title if it's been inferred
        if not (title or metadata.get('title_inferred')):
            title = metadata.get('title', config.title)

        with open(os.path.join(self.pages_dir, page), 'r') as f:
            return render_template(
                'article.html',
                config=config,
                title=title,
                image=metadata.get('image'),
                description=metadata.get('description'),
                author=(
                    re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1]
                    if 'author' in metadata else None
                ),
                author_email=(
                    re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2]
                    if 'author' in metadata else None
                ),
                published=(
                    metadata['published'].strftime('%b %d, %Y')
                    if metadata.get('published') and not metadata.get('published_inferred')
                    else None
                ),
                content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]),
                skip_header=skip_header
            )

    def get_pages(
        self,
        with_content: bool = False,
        skip_header: bool = False,
        sorter: Type[PagesSorter] = PagesSortByTime,
        reverse: bool = True,
    ) -> List[Tuple[int, dict]]:
        pages_dir = app.pages_dir.rstrip('/')
        pages = [
            {
                'path': os.path.join(root[len(pages_dir)+1:], f),
                'folder': root[len(pages_dir)+1:],
                'content': (
                    self.get_page(
                        os.path.join(root, f),
                        skip_header=skip_header
                    )
                    if with_content else ''
                ),
                **self.get_page_metadata(
                    os.path.join(root[len(pages_dir)+1:], f)
                ),
            }
            for root, _, files in os.walk(pages_dir, followlinks=True)
            for f in files
            if f.endswith('.md')
        ]

        sorter_func = sorter(pages)
        pages.sort(key=sorter_func, reverse=reverse)
        return [(i, page) for i, page in enumerate(pages)]


app = BlogApp(__name__)


from .routes import *


# vim:sw=4:ts=4:et:
First commit 2022-01-11 20:16:27 +01:00			`import datetime`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`import os`
First commit 2022-01-11 20:16:27 +01:00			`import re`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`from typing import Optional, List, Tuple, Type`
First commit 2022-01-11 20:16:27 +01:00
docs refactor and minor fixes 2022-01-11 23:38:28 +01:00			`from flask import Flask, abort`
First commit 2022-01-11 20:16:27 +01:00			`from markdown import markdown`

fixed removed import 2022-01-12 00:18:30 +01:00			`from .config import config`
First commit 2022-01-11 20:16:27 +01:00			`from .latex import MarkdownLatex`
Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`from ._sorters import PagesSorter, PagesSortByTime`
First commit 2022-01-11 20:16:27 +01:00

			`class BlogApp(Flask):`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`_title_header_regex = re.compile(r'^#\s((\[(.)\])\|(.*))')`

First commit 2022-01-11 20:16:27 +01:00			`def __init__(self, args, *kwargs):`
			`super().__init__(args, template_folder=config.templates_dir, *kwargs)`
			`self.pages_dir = os.path.join(config.content_dir, 'markdown')`
			`self.img_dir = config.default_img_dir`
			`self.css_dir = config.default_css_dir`
Added support for PWA logic 2022-06-14 10:25:57 +02:00			`self.js_dir = config.default_js_dir`
First commit 2022-01-11 20:16:27 +01:00			`self.fonts_dir = config.default_fonts_dir`

			`if not os.path.isdir(self.pages_dir):`
More flexible structure for the content directory. If the `markdown` subfolder does not exist, then the whole `config.content_dir` is treated as the root for markdown files. 2022-06-12 00:01:51 +02:00			# If the `markdown` subfolder does not exist, then the whole
			# `config.content_dir` is treated as the root for markdown files.
If img_dir is not found under content_dir then treat content_dir itself as img_dir 2022-06-14 00:44:01 +02:00			`self.pages_dir = config.content_dir`
First commit 2022-01-11 20:16:27 +01:00
			`img_dir = os.path.join(config.content_dir, 'img')`
			`if os.path.isdir(img_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.img_dir = os.path.abspath(img_dir)`
If img_dir is not found under content_dir then treat content_dir itself as img_dir 2022-06-14 00:44:01 +02:00			`else:`
			`self.img_dir = config.content_dir`
First commit 2022-01-11 20:16:27 +01:00
			`css_dir = os.path.join(config.content_dir, 'css')`
			`if os.path.isdir(css_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.css_dir = os.path.abspath(css_dir)`
First commit 2022-01-11 20:16:27 +01:00
Added support for PWA logic 2022-06-14 10:25:57 +02:00			`js_dir = os.path.join(config.content_dir, 'js')`
			`if os.path.isdir(js_dir):`
			`self.js_dir = os.path.abspath(js_dir)`

First commit 2022-01-11 20:16:27 +01:00			`fonts_dir = os.path.join(config.content_dir, 'fonts')`
			`if os.path.isdir(fonts_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.fonts_dir = os.path.abspath(fonts_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`templates_dir = os.path.join(config.content_dir, 'templates')`
			`if os.path.isdir(templates_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.template_folder = os.path.abspath(templates_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`def get_page_metadata(self, page: str) -> dict:`
			`if not page.endswith('.md'):`
			`page = page + '.md'`

Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`md_file = os.path.join(self.pages_dir, page)`
			`if not os.path.isfile(md_file):`
First commit 2022-01-11 20:16:27 +01:00			`abort(404)`

			`metadata = {}`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			`with open(md_file, 'r') as f:`
First commit 2022-01-11 20:16:27 +01:00			`metadata['uri'] = '/article/' + page[:-3]`

			`for line in f.readlines():`
			`if not line:`
			`continue`

			`if not (m := re.match(r'^\[//]: # \(([^:]+):\s([^)]+)\)\s$', line)):`
			`break`

			`if m.group(1) == 'published':`
			`metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2))`
			`else:`
			`metadata[m.group(1)] = m.group(2)`

Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`if not metadata.get('title'):`
			# If the `title` header isn't available in the file,
			`# infer it from the first line of the file`
			`with open(md_file, 'r') as f:`
			`header = ''`
			`for line in f.readlines():`
			`header = line`
			`break`

			`metadata['title_inferred'] = True`
			`m = self._title_header_regex.search(header)`
			`if m:`
			`metadata['title'] = m.group(3) or m.group(1)`
			`else:`
			`metadata['title'] = os.path.basename(md_file)`

If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			`if not metadata.get('published'):`
			# If the `published` header isn't available in the file,
			`# infer it from the file's creation date`
			`metadata['published'] = datetime.date.fromtimestamp(os.stat(md_file).st_ctime)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`metadata['published_inferred'] = True`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00
First commit 2022-01-11 20:16:27 +01:00			`return metadata`

			`def get_page(self, page: str, title: Optional[str] = None, skip_header: bool = False):`
			`if not page.endswith('.md'):`
			`page = page + '.md'`

			`metadata = self.get_page_metadata(page)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`# Don't duplicate the page title if it's been inferred`
			`if not (title or metadata.get('title_inferred')):`
			`title = metadata.get('title', config.title)`

First commit 2022-01-11 20:16:27 +01:00			`with open(os.path.join(self.pages_dir, page), 'r') as f:`
			`return render_template(`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`'article.html',`
			`config=config,`
			`title=title,`
			`image=metadata.get('image'),`
			`description=metadata.get('description'),`
			`author=(`
			`re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1]`
			`if 'author' in metadata else None`
			`),`
			`author_email=(`
			`re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2]`
			`if 'author' in metadata else None`
			`),`
			`published=(`
			`metadata['published'].strftime('%b %d, %Y')`
			`if metadata.get('published') and not metadata.get('published_inferred')`
			`else None`
			`),`
			`content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]),`
			`skip_header=skip_header`
First commit 2022-01-11 20:16:27 +01:00			`)`

Support for folders for pages/articles 2022-06-14 00:32:35 +02:00			`def get_pages(`
			`self,`
			`with_content: bool = False,`
			`skip_header: bool = False,`
			`sorter: Type[PagesSorter] = PagesSortByTime,`
			`reverse: bool = True,`
			`) -> List[Tuple[int, dict]]:`
			`pages_dir = app.pages_dir.rstrip('/')`
			`pages = [`
			`{`
			`'path': os.path.join(root[len(pages_dir)+1:], f),`
			`'folder': root[len(pages_dir)+1:],`
			`'content': (`
			`self.get_page(`
			`os.path.join(root, f),`
			`skip_header=skip_header`
			`)`
			`if with_content else ''`
			`),`
			`**self.get_page_metadata(`
			`os.path.join(root[len(pages_dir)+1:], f)`
			`),`
			`}`
			`for root, _, files in os.walk(pages_dir, followlinks=True)`
			`for f in files`
			`if f.endswith('.md')`
			`]`

			`sorter_func = sorter(pages)`
			`pages.sort(key=sorter_func, reverse=reverse)`
			`return [(i, page) for i, page in enumerate(pages)]`
First commit 2022-01-11 20:16:27 +01:00

			`app = BlogApp(__name__)`


			`from .routes import *`


			`# vim:sw=4:ts=4:et:`