website/generator/process.py

561 lines
20 KiB
Python

from collections.abc import Generator
import enum
import json
import re
from pathlib import Path
from rich import print as rprint
import shutil
import jinja2
import feeds
import markdown
from typedef import *
from util import *
import itertools
import subprocess
import datetime
import os
class _FileType(enum.IntEnum):
HTML_TEMPLATE = enum.auto()
STATIC = enum.auto()
MARKDOWN_TEMPLATE = enum.auto()
def _walk_content(
start_dir: str | Path,
) -> Generator[tuple[str, _FileType], None, None]:
if type(start_dir) is not Path:
start_dir = Path(start_dir)
for item in os.listdir(start_dir):
if item.startswith("_"):
continue
p = start_dir / item
if os.path.isdir(p):
yield from _walk_content(p)
continue
match "" if len((sp := item.split("."))) <= 1 else ".".join(sp[1:]).lower():
case "html":
ftype = _FileType.HTML_TEMPLATE
case "md":
ftype = _FileType.MARKDOWN_TEMPLATE
case _:
ftype = _FileType.STATIC
yield p, ftype
def _make_canonical_url(site_config: any, path: Path | str) -> str:
path_str = str(path).strip("/")
if path_str == ".":
path_str = ""
return (
site_config["baseURL"].rstrip("/")
+ "/"
+ path_str
+ ("/" if path_str != "" else "")
)
def content(base_dir: Path, output_dir: Path, jinja_env: jinja2.Environment, site_config: any):
markdown_to_html = markdown.create(escape=False)
walk_dir = base_dir / "content"
for fpath, filetype in _walk_content(walk_dir):
site_inner_path = fpath.relative_to(
walk_dir
) # the path of the file *inside* a site directory structure (eg. inside of `_dist` or inside of `content`)
tpl_frontmatter, raw_tpl = None, None
with open(fpath) as f:
try:
tpl_frontmatter, raw_tpl = extract_frontmatter(f.read())
except ValueError:
# could not parse template so this is going to have to copied as a static file
pass
if (
tpl_frontmatter and raw_tpl
): # do we have a template we can make context for and render?
render_as_directory = (
bool(t)
if (t := tpl_frontmatter.get("asDirectory")) is not None
else not (
site_inner_path.stem.lower() == "index"
and site_inner_path.suffix.lower() in [".md", ".html"]
)
)
target_path = output_dir / (
site_inner_path.with_suffix(".html")
if not render_as_directory
else site_inner_path.parent
/ site_inner_path.name.split(".")[0]
/ "index.html"
)
os.makedirs(target_path.parent, exist_ok=True)
ctx = {"site": site_config, "data": DataLoader(base_dir / "data")}
if "canonicalURL" not in tpl_frontmatter and not tpl_frontmatter.get(
"preserveCanonicalURL", False
):
internal_site_path = target_path.relative_to(output_dir)
tpl_frontmatter["canonicalURL"] = _make_canonical_url(
site_config,
(
internal_site_path.parent
if internal_site_path.name.lower() == "index.html"
else internal_site_path
),
)
ctx["page"] = tpl_frontmatter
ctx["file"] = {
"mod_time": datetime.datetime.fromtimestamp(os.stat(fpath).st_mtime)
}
match filetype:
case _FileType.HTML_TEMPLATE:
if not tpl_frontmatter.get("isTemplate", True):
# this is only done since we need to set a jinja template object for later on
ctx["x"] = raw_tpl
tpl = jinja_env.from_string("{{ x | safe }}")
else:
tpl = jinja_env.from_string(raw_tpl)
case _FileType.MARKDOWN_TEMPLATE:
if tpl_frontmatter.get("isTemplate", False):
raw_tpl = jinja_env.from_string(raw_tpl).render(ctx)
ctx["rendered"], render_state = markdown_to_html.parse(raw_tpl)
tpl_str = '{% extends "_layouts/base.html" %}{% block main %}{{ rendered | safe }}{% endblock %}'
if tpl_frontmatter.get("showToc", False):
ctx["toc"] = markdown.render_toc_from_state(
render_state, min_level=2
)
tpl_str += '{% import "_imports/toc.html" as tc %}{% block aside %}{{ tc.render(toc) }}{% endblock %}'
tpl = jinja_env.from_string(tpl_str)
case _:
assert False, "impossible state"
res = tpl.render(ctx)
with open(target_path, "w") as f:
f.write(res)
else:
if (
fpath.suffix.lower() != ".scss"
): # sass itself runs after this and will handle this for us
if filetype != _FileType.STATIC:
rprint(
WARN_LEADER
+ f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
)
target_path = output_dir / site_inner_path
os.makedirs(target_path.parent, exist_ok=True)
shutil.copy(fpath, target_path)
def sass(base_dir: Path, output_dir: Path):
subprocess.run(
[
"sass",
"--style=compressed",
str(base_dir / "content/assets/css")
+ ":"
+ str(output_dir / "html/assets/css"),
]
).check_returncode()
BLOG_DATE_FORMAT = "%Y-%m-%d"
BLOG_TAG_RE = re.compile(r"[a-zA-Z\d-]+")
def blog(base_dir: Path, output_dir: Path, jinja_env: jinja2.Environment, site_config: any):
markdown_to_html = markdown.create(escape=False, header_level_delta=1)
walk_dir = base_dir / "blog"
posts = {}
for fpath, filetype in _walk_content(walk_dir):
inner_path = fpath.relative_to(walk_dir)
match filetype:
case _FileType.MARKDOWN_TEMPLATE:
with open(fpath) as f:
post_frontmatter, raw_post_md = extract_frontmatter(f.read())
post_slug = (
fpath.name[:-3] if fpath.name != "content.md" else fpath.parent.name
)
if post_slug in posts:
rprint(
ERROR_LEADER + f"Duplicate post slug [bold]{post_slug}[/bold]"
)
raise SystemExit(1)
# check required keys
missing_keys = [
key
for key in ["title", "publishedDate"]
if key not in post_frontmatter
]
if len(missing_keys) > 0:
rprint(
ERROR_LEADER
+ f"Post [bold]{post_slug}[/bold] missing the following frontmatter keys: "
+ ",".join(missing_keys)
)
raise SystemExit(1)
# check tags are valid
if "tags" in post_frontmatter:
invalid_tags = [
tag
for tag in post_frontmatter["tags"]
if not BLOG_TAG_RE.fullmatch(tag)
]
if len(invalid_tags) > 0:
rprint(
ERROR_LEADER
+ f"Post [bold]{post_slug}[/bold] has the following invalid tags: "
+ ",".join(map(repr, invalid_tags))
)
raise SystemExit(1)
target_path = output_dir / "blog" / post_slug / "index.html"
os.makedirs(target_path.parent, exist_ok=True)
if "updatedDate" in post_frontmatter:
post_frontmatter["updatedDate"] = list(
sorted(post_frontmatter["updatedDate"], reverse=True)
)
posts[post_slug] = post_frontmatter
rendered_html, render_state = markdown_to_html.parse(raw_post_md)
# build jinja context
ctx = {
"site": site_config,
"post": post_frontmatter,
"content": rendered_html,
"toc": markdown.render_toc_from_state(render_state),
"page": {
k: post_frontmatter[k]
for k in ["title", "description", "imageURL"]
if k in post_frontmatter
},
}
ctx["page"]["canonicalURL"] = _make_canonical_url(
site_config, f"/blog/{post_slug}"
)
# execute jinja template
tpl = jinja_env.get_template("_layouts/blog/post.html")
res = tpl.render(ctx)
# dump to file
with open(target_path, "w") as f:
f.write(res)
case _:
if filetype != _FileType.STATIC:
rprint(
WARN_LEADER
+ f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
)
target_path = output_dir / "blog" / inner_path
os.makedirs(target_path.parent, exist_ok=True)
shutil.copy(fpath, target_path)
# generate listing
post_list = []
tags = {}
for slug in posts:
post = posts[slug]
if (pv := post.get("visible", {})) == False or not pv.get("list", True):
print(slug)
continue
post_list.append(
AbbreviatedPost(
slug,
post["title"],
post.get("description", ""),
post["publishedDate"],
(
None
if "updatedDate" not in post or len(post["updatedDate"]) == 0
else post["updatedDate"][0]
),
"favourite" in (post_tags := (post["tags"] if "tags" in post else [])),
post_tags,
)
)
if "tags" in post:
for tag in post["tags"]:
tags[tag] = tags.get(tag, 0) + 1
try:
tag_descriptions = load_yaml(open(base_dir / "blogTags.yml").read())
except FileNotFoundError:
tag_descriptions = {}
tag_list = [AbbreviatedTag(x, tag_descriptions.get(x), tags[x]) for x in tags]
# Here, we sort twice - once to get the tag list sorted alphabetically going in ascending order
# and a second time to get the tag list sorted by presence of description and article count in
# descending order.
tag_list = sorted(tag_list, key=lambda x: x.slug)
tag_list = sorted(
tag_list,
key=lambda x: (x.description is not None, x.articleCount),
reverse=True,
)
post_list = list(
sorted(
post_list,
key=lambda x: x.publishedDate,
reverse=True,
)
)
with open(output_dir / "blog" / "index.html", "w") as f:
tpl = jinja_env.get_template("_layouts/blog/index.html")
r = tpl.render(
{
"site": site_config,
"page": {
"title": "Blog",
"canonicalURL": _make_canonical_url(site_config, "/blog/"),
"showAside": True,
},
"posts": post_list,
}
)
f.write(r)
# generate tag list
tags_output_dir = output_dir / "blog" / "tags"
os.makedirs(tags_output_dir, exist_ok=True)
with open(tags_output_dir / "index.html", "w") as f:
tpl = jinja_env.get_template("_layouts/blog/tags.html")
r = tpl.render(
{
"site": site_config,
"page": {
"title": "Blog tags",
"canonicalURL": _make_canonical_url(site_config, "/blog/tags/"),
"showAside": True,
},
"tags": tag_list,
}
)
f.write(r)
# generate tag-specific index pages
tpl = jinja_env.get_template("_layouts/blog/postsFilteredByTag.html")
for tag in tag_list:
d = tags_output_dir / tag.slug
os.makedirs(d, exist_ok=True)
with open(d / "index.html", "w") as f:
f.write(
tpl.render(
{
"site": site_config,
"page": {
"title": f"{tag.slug} :: Blog Tags",
"canonicalURL": _make_canonical_url(
site_config, f"/blog/tags/{tag.slug}/"
),
"showAside": True,
},
"tag": tag,
"posts": [p for p in post_list if tag.slug in p.tags],
}
)
)
feed_post_list = list(filter(lambda x: (p := posts[x.slug].get("visible", {})) != False and p.get("feed", True), post_list))[:10]
# generate feeds
with open(output_dir / "blog" / "feed.atom", "w") as f:
f.write(feeds.atom(site_config, feed_post_list))
with open(output_dir / "blog" / "feed.json", "w") as f:
f.write(feeds.json(site_config, feed_post_list))
def caddy_config(
base_dir: Path, output_dir: Path, jinja_env: jinja2.Environment, site_config: any
):
try:
x = open(base_dir / "redirects.yml").read()
raw_redirect_rules = load_yaml(x)
except FileNotFoundError:
raw_redirect_rules = []
redirects = []
for i, rule in enumerate(raw_redirect_rules):
if "from" not in rule or "to" not in rule:
rprint(ERROR_LEADER + f"Redirect rule {i} missing either from or to field")
raise SystemExit(1)
status = rule.get("code", 302)
if not ((300 <= status <= 399) or status == 401):
rprint(
ERROR_LEADER
+ f"Redirect rule {i} has an invalid status code (not 3xx or 401)"
)
raise SystemExit(1)
redirects.append(
{
"match": [{"path": [rule["from"]]}],
"handle": [
{
"handler": "static_response",
"headers": {
"Location": [rule["to"]],
},
"status_code": rule.get("code", 302),
}
],
}
)
if not rule["from"].endswith("/") and rule.get("transform", True):
redirects.append(
{
"match": [{"path": [rule["from"] + "/"]}],
"handle": [
{
"handler": "static_response",
"headers": {
"Location": [rule["to"]],
},
"status_code": rule.get("code", 302),
}
],
}
)
conf = {
"admin": {"disabled": True},
"apps": {
"http": {
"servers": {
"srv0": {
"automatic_https": {"disable": True},
"listen": [":8080"],
"routes": [
{
"handle": [
{
"handler": "headers",
"response": {
"deferred": True,
"delete": ["Server"],
},
}
]
},
{
"handle": [
{
"handler": "subroute",
"routes": [
*redirects,
{
"handle": [
{
"handler": "file_server",
"root": "./html",
}
],
},
],
"errors": {
"routes": [
{
"handle": [
{
"handler": "rewrite",
"uri": "/404.html",
},
{
"handler": "file_server",
"root": "./html",
},
],
"terminal": True,
},
],
},
},
],
},
],
},
},
},
},
}
with open(output_dir / "caddy_config.json", "w") as f:
json.dump(conf, f, indent="\t")
def compress_png(output_dir: Path):
pngcrush_exe: str | None = shutil.which("pngcrush")
if pngcrush_exe is None:
rprint(
WARN_LEADER + "cannot find pngcrush executable, skipping PNG compression"
)
return
for image in itertools.chain(
output_dir.rglob("*.png"),
output_dir.rglob("*.PNG"),
):
proc: subprocess.CompletedProcess = subprocess.run([pngcrush_exe, "-ow", image])
proc.check_returncode()
def strip_exif(output_dir: Path):
mogrify_exe: str | None = shutil.which("mogrify")
if mogrify_exe is None:
rprint(
WARN_LEADER
+ "cannot find mogrify (of imagemagick) executable, skipping EXIF data removal"
)
return
for image in itertools.chain(
output_dir.rglob("*.jpg"),
output_dir.rglob("*.JPG"),
output_dir.rglob("*.jpeg"),
output_dir.rglob("*.JPEG"),
output_dir.rglob("*.png"),
output_dir.rglob("*.PNG"),
):
proc: subprocess.CompletedProcess = subprocess.run(
[mogrify_exe, "-strip", image]
)
proc.check_returncode()