madblog/madblog/app.py

import datetime
import os
import re
from glob import glob
from typing import Optional

from flask import Flask, abort
from markdown import markdown

from .config import config
from .latex import MarkdownLatex


class BlogApp(Flask):
    _title_header_regex = re.compile(r'^#\s*((\[(.*)\])|(.*))')

    def __init__(self, *args, **kwargs):
        super().__init__(*args, template_folder=config.templates_dir, **kwargs)
        self.pages_dir = os.path.join(config.content_dir, 'markdown')
        self.img_dir = config.default_img_dir
        self.css_dir = config.default_css_dir
        self.fonts_dir = config.default_fonts_dir

        if not os.path.isdir(self.pages_dir):
            raise FileNotFoundError(self.pages_dir)

        img_dir = os.path.join(config.content_dir, 'img')
        if os.path.isdir(img_dir):
            self.img_dir = os.path.abspath(img_dir)

        css_dir = os.path.join(config.content_dir, 'css')
        if os.path.isdir(css_dir):
            self.css_dir = os.path.abspath(css_dir)

        fonts_dir = os.path.join(config.content_dir, 'fonts')
        if os.path.isdir(fonts_dir):
            self.fonts_dir = os.path.abspath(fonts_dir)

        templates_dir = os.path.join(config.content_dir, 'templates')
        if os.path.isdir(templates_dir):
            self.template_folder = os.path.abspath(templates_dir)

    def get_page_metadata(self, page: str) -> dict:
        if not page.endswith('.md'):
            page = page + '.md'

        if not os.path.isfile(os.path.join(self.pages_dir, page)):
            abort(404)

        metadata = {}
        md_file = os.path.join(self.pages_dir, page)
        with open(md_file, 'r') as f:
            metadata['uri'] = '/article/' + page[:-3]

            for line in f.readlines():
                if not line:
                    continue

                if not (m := re.match(r'^\[//]: # \(([^:]+):\s*([^)]+)\)\s*$', line)):
                    break

                if m.group(1) == 'published':
                    metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2))
                else:
                    metadata[m.group(1)] = m.group(2)

        if not metadata.get('title'):
            # If the `title` header isn't available in the file,
            # infer it from the first line of the file
            with open(md_file, 'r') as f:
                header = ''
                for line in f.readlines():
                    header = line
                    break

            metadata['title_inferred'] = True
            m = self._title_header_regex.search(header)
            if m:
                metadata['title'] = m.group(3) or m.group(1)
            else:
                metadata['title'] = os.path.basename(md_file)

        if not metadata.get('published'):
            # If the `published` header isn't available in the file,
            # infer it from the file's creation date
            metadata['published'] = datetime.date.fromtimestamp(os.stat(md_file).st_ctime)
            metadata['published_inferred'] = True

        return metadata

    def get_page(self, page: str, title: Optional[str] = None, skip_header: bool = False):
        if not page.endswith('.md'):
            page = page + '.md'

        metadata = self.get_page_metadata(page)
        # Don't duplicate the page title if it's been inferred
        if not (title or metadata.get('title_inferred')):
            title = metadata.get('title', config.title)

        with open(os.path.join(self.pages_dir, page), 'r') as f:
            return render_template(
                'article.html',
                config=config,
                title=title,
                image=metadata.get('image'),
                description=metadata.get('description'),
                author=(
                    re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1]
                    if 'author' in metadata else None
                ),
                author_email=(
                    re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2]
                    if 'author' in metadata else None
                ),
                published=(
                    metadata['published'].strftime('%b %d, %Y')
                    if metadata.get('published') and not metadata.get('published_inferred')
                    else None
                ),
                content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]),
                skip_header=skip_header
            )

    def get_pages(self, with_content: bool = False, skip_header: bool = False) -> list:
        return sorted(
            [
                {
                    'path': path[len(app.pages_dir)+1:],
                    'content': self.get_page(path[len(app.pages_dir)+1:], skip_header=skip_header) if with_content else '',
                    **self.get_page_metadata(os.path.basename(path)),
                }
                for path in glob(os.path.join(app.pages_dir, '*.md'))
            ],
            key=lambda page: page.get('published', datetime.date.fromtimestamp(0)),
            reverse=True
        )


app = BlogApp(__name__)


from .routes import *


# vim:sw=4:ts=4:et:
First commit 2022-01-11 20:16:27 +01:00			`import datetime`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`import os`
First commit 2022-01-11 20:16:27 +01:00			`import re`
			`from glob import glob`
			`from typing import Optional`

docs refactor and minor fixes 2022-01-11 23:38:28 +01:00			`from flask import Flask, abort`
First commit 2022-01-11 20:16:27 +01:00			`from markdown import markdown`

fixed removed import 2022-01-12 00:18:30 +01:00			`from .config import config`
First commit 2022-01-11 20:16:27 +01:00			`from .latex import MarkdownLatex`


			`class BlogApp(Flask):`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`_title_header_regex = re.compile(r'^#\s((\[(.)\])\|(.*))')`

First commit 2022-01-11 20:16:27 +01:00			`def __init__(self, args, *kwargs):`
			`super().__init__(args, template_folder=config.templates_dir, *kwargs)`
			`self.pages_dir = os.path.join(config.content_dir, 'markdown')`
			`self.img_dir = config.default_img_dir`
			`self.css_dir = config.default_css_dir`
			`self.fonts_dir = config.default_fonts_dir`

			`if not os.path.isdir(self.pages_dir):`
			`raise FileNotFoundError(self.pages_dir)`

			`img_dir = os.path.join(config.content_dir, 'img')`
			`if os.path.isdir(img_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.img_dir = os.path.abspath(img_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`css_dir = os.path.join(config.content_dir, 'css')`
			`if os.path.isdir(css_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.css_dir = os.path.abspath(css_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`fonts_dir = os.path.join(config.content_dir, 'fonts')`
			`if os.path.isdir(fonts_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.fonts_dir = os.path.abspath(fonts_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`templates_dir = os.path.join(config.content_dir, 'templates')`
			`if os.path.isdir(templates_dir):`
Do os.path.abspath on custom content folders in the app constructor before initialization If relative paths are used for the content folder then send_from_directory may mistakenly interpret the paths as relative to the main application, instead of the content folder 2022-01-12 00:30:25 +01:00			`self.template_folder = os.path.abspath(templates_dir)`
First commit 2022-01-11 20:16:27 +01:00
			`def get_page_metadata(self, page: str) -> dict:`
			`if not page.endswith('.md'):`
			`page = page + '.md'`

			`if not os.path.isfile(os.path.join(self.pages_dir, page)):`
			`abort(404)`

			`metadata = {}`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			`md_file = os.path.join(self.pages_dir, page)`
			`with open(md_file, 'r') as f:`
First commit 2022-01-11 20:16:27 +01:00			`metadata['uri'] = '/article/' + page[:-3]`

			`for line in f.readlines():`
			`if not line:`
			`continue`

			`if not (m := re.match(r'^\[//]: # \(([^:]+):\s([^)]+)\)\s$', line)):`
			`break`

			`if m.group(1) == 'published':`
			`metadata[m.group(1)] = datetime.date.fromisoformat(m.group(2))`
			`else:`
			`metadata[m.group(1)] = m.group(2)`

Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`if not metadata.get('title'):`
			# If the `title` header isn't available in the file,
			`# infer it from the first line of the file`
			`with open(md_file, 'r') as f:`
			`header = ''`
			`for line in f.readlines():`
			`header = line`
			`break`

			`metadata['title_inferred'] = True`
			`m = self._title_header_regex.search(header)`
			`if m:`
			`metadata['title'] = m.group(3) or m.group(1)`
			`else:`
			`metadata['title'] = os.path.basename(md_file)`

If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			`if not metadata.get('published'):`
			# If the `published` header isn't available in the file,
			`# infer it from the file's creation date`
			`metadata['published'] = datetime.date.fromtimestamp(os.stat(md_file).st_ctime)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`metadata['published_inferred'] = True`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00
First commit 2022-01-11 20:16:27 +01:00			`return metadata`

			`def get_page(self, page: str, title: Optional[str] = None, skip_header: bool = False):`
			`if not page.endswith('.md'):`
			`page = page + '.md'`

			`metadata = self.get_page_metadata(page)`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`# Don't duplicate the page title if it's been inferred`
			`if not (title or metadata.get('title_inferred')):`
			`title = metadata.get('title', config.title)`

First commit 2022-01-11 20:16:27 +01:00			`with open(os.path.join(self.pages_dir, page), 'r') as f:`
			`return render_template(`
Infer the title of an article/file when not specified in the header metadata Infer the title from the first line of the file if it's a header, otherwise use the filename. 2022-06-11 23:54:18 +02:00			`'article.html',`
			`config=config,`
			`title=title,`
			`image=metadata.get('image'),`
			`description=metadata.get('description'),`
			`author=(`
			`re.match(r'(.+?)\s+<([^>]+>)', metadata['author'])[1]`
			`if 'author' in metadata else None`
			`),`
			`author_email=(`
			`re.match(r'(.+?)\s+<([^>]+)>', metadata['author'])[2]`
			`if 'author' in metadata else None`
			`),`
			`published=(`
			`metadata['published'].strftime('%b %d, %Y')`
			`if metadata.get('published') and not metadata.get('published_inferred')`
			`else None`
			`),`
			`content=markdown(f.read(), extensions=['fenced_code', 'codehilite', MarkdownLatex()]),`
			`skip_header=skip_header`
First commit 2022-01-11 20:16:27 +01:00			`)`

			`def get_pages(self, with_content: bool = False, skip_header: bool = False) -> list:`
If no published date is available on the headers, infer it from the file creation date 2022-06-11 23:02:07 +02:00			`return sorted(`
			`[`
			`{`
			`'path': path[len(app.pages_dir)+1:],`
			`'content': self.get_page(path[len(app.pages_dir)+1:], skip_header=skip_header) if with_content else '',`
			`**self.get_page_metadata(os.path.basename(path)),`
			`}`
			`for path in glob(os.path.join(app.pages_dir, '*.md'))`
			`],`
			`key=lambda page: page.get('published', datetime.date.fromtimestamp(0)),`
			`reverse=True`
			`)`
First commit 2022-01-11 20:16:27 +01:00

			`app = BlogApp(__name__)`


			`from .routes import *`


			`# vim:sw=4:ts=4:et:`