blog/app/latex.py

243 lines
7.5 KiB
Python

"""
Licensed under Public Domain Mark 1.0.
See https://creativecommons.org/publicdomain/mark/1.0/
Author: Justin Bruce Van Horne <justinvh@gmail.com>
Python-Markdown LaTeX Extension
Adds support for $math mode$ and %text mode%. This plugin supports
multiline equations/text.
The actual image generation is done via LaTeX/DVI output.
It encodes data as base64 so there is no need for images directly.
All the work is done in the preprocessor.
"""
import base64
import hashlib
import json
import os
import re
import tempfile
from subprocess import call as rawcall, PIPE
import markdown
def call(*args, **kwargs):
"""
Proxy to subprocess.call(), removes timeout argument in case of
Python2 because that was only implemented in Python3.
"""
return rawcall(*args, **kwargs)
# Defines our basic inline image
img_expr = '<img class="latex inline math-%s" alt="%s" id="%s" src="data:image/png;base64,%s">'
# Defines multiline expression image
multiline_img_expr = '''<div class="multiline-wrapper">
<img class="latex multiline math-%s" alt="%s" id="%s" src="data:image/png;base64,%s"></div>'''
# Base CSS template
img_css = """<style scoped>
.multiline-wrapper {
width: 100%;
text-align: center;
}
img.latex.multiline {
height: 65%;
}
img.latex.inline {
height: .9em;
vertical-align: middle;
}
</style>"""
# Cache and temp file paths
tmpdir = tempfile.gettempdir() + '/markdown-latex'
cache_file = tmpdir + '/latex.cache'
class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
# These are our cached expressions that are stored in latex.cache
cached = {}
# Basic LaTex Setup as well as our list of expressions to parse
tex_preamble = r"""\documentclass[14pt]{article}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{graphicx}
\usepackage[usenames,dvipsnames]{color}
\pagestyle{empty}
"""
# Math TeX extraction regex
math_extract_regex = re.compile(r'(.+?)((\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n))(.*)', re.MULTILINE | re.DOTALL)
# Math TeX matching regex
math_match_regex = re.compile(r'\s*(\\\(.+?\\\))|(\$\$\n.+?\n\$\$\n)\s*', re.MULTILINE | re.DOTALL)
def __init__(self, *_, **__):
if not os.path.isdir(tmpdir):
os.makedirs(tmpdir)
try:
with open(cache_file, 'r') as f:
self.cached = json.load(f)
except (IOError, json.JSONDecodeError):
self.cached = {}
self.config = {
("general", "preamble"): "",
("dvipng", "args"): "-q -T tight -bg Transparent -z 9 -D 200",
("delimiters", "text"): "%",
("delimiters", "math"): "$",
("delimiters", "preamble"): "%%"}
def _latex_to_base64(self, tex):
"""Generates a base64 representation of TeX string"""
# Generate the temporary file
tmp_file_fd, path = tempfile.mkstemp(dir=tmpdir)
with os.fdopen(tmp_file_fd, "w") as tmp_file:
tmp_file.write(self.tex_preamble)
tmp_file.write(tex)
tmp_file.write('\n\\end{document}')
# compile LaTeX document. A DVI file is created
status = call(('latex -halt-on-error -output-directory={:s} {:s}'
.format(tmpdir, path)).split(),
stdout=PIPE, timeout=10)
# clean up if the above failed
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't compile LaTeX document." +
"Please read '%s.log' for more detail." % path)
# Run dvipng on the generated DVI file. Use tight bounding box.
# Magnification is set to 1200
dvi = "%s.dvi" % path
png = "%s.png" % path
# Extract the image
cmd = "dvipng %s %s -o %s" % (self.config[("dvipng", "args")], dvi, png)
status = call(cmd.split(), stdout=PIPE)
# clean up if we couldn't make the above work
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't convert LaTeX to image." +
"Please read '%s.log' for more detail." % path)
# Read the png and encode the data
try:
with open(png, "rb") as png:
data = png.read()
return base64.b64encode(data)
finally:
self._cleanup(path)
@staticmethod
def _cleanup(path, err=False):
# don't clean up the log if there's an error
extensions = ["", ".aux", ".dvi", ".png", ".log"]
if err:
extensions.pop()
# now do the actual cleanup, passing on non-existent files
for extension in extensions:
try:
os.remove("%s%s" % (path, extension))
except (IOError, OSError):
pass
def run(self, lines):
"""Parses the actual page"""
# Re-creates the entire page so we can parse in a multine env.
page = "\n".join(lines)
# Adds a preamble mode
self.tex_preamble += self.config[("general", "preamble")] + "\n\\begin{document}\n"
# Figure out our text strings and math-mode strings
tex_expr = self.math_extract_regex.findall(page)
# No sense in doing the extra work
if not len(tex_expr):
return page.split("\n")
# Parse the expressions
new_cache = {}
new_page = ''
n_multiline_expressions = 0
while page:
m = self.math_extract_regex.match(page)
if not m:
new_page += page
break
new_page += m.group(1)
math_match = self.math_match_regex.match(m.group(2))
if not math_match:
new_page += m.group(2)
else:
expr = m.group(2)
is_multiline = math_match.group(2) is not None
tex_hash = self.hash(expr)
if tex_hash in self.cached:
data = self.cached[tex_hash]
else:
data = self._latex_to_base64(expr).decode()
new_cache[tex_hash] = data
if is_multiline and n_multiline_expressions > 0:
new_page += '</p>'
new_page += (multiline_img_expr if is_multiline else img_expr) % ('true', expr, tex_hash, data)
if is_multiline:
new_page += '<p>'
n_multiline_expressions += 1
page = m.group(5)
if n_multiline_expressions > 0:
new_page += '</p>'
# Cache our data
self.cached.update(new_cache)
with open(cache_file, 'w') as f:
json.dump(self.cached, f)
# Make sure to re-split the lines
return new_page.split("\n")
@staticmethod
def hash(tex: str) -> str:
return hashlib.sha1(tex.encode()).hexdigest()
class LaTeXPostprocessor(markdown.postprocessors.Postprocessor):
"""This post processor extension just allows us to further
refine, if necessary, the document after it has been parsed."""
# noinspection PyMethodMayBeStatic
def run(self, text):
# Inline a style for default behavior
text = img_css + text
return text
class MarkdownLatex(markdown.Extension):
"""Wrapper for LaTeXPreprocessor"""
def extendMarkdown(self, md):
# Our base LaTeX extension
md.preprocessors.add('latex',
LaTeXPreprocessor(self), ">html_block")
# Our cleanup postprocessing extension
md.postprocessors.add('latex',
LaTeXPostprocessor(self), ">amp_substitute")