website/generator/process.py
2024-02-11 00:55:23 +00:00

436 lines
15 KiB
Python

from util import *
from pathlib import Path
from jinja2 import Environment
import shutil
from rich import print as rprint
import feeds
from typedef import *
import re
import enum
from collections.abc import Generator
import json
def _template_frontmatter(data: any, jinja_env: Environment, context: any):
for key in data:
v = data[key]
if (t := type(v)) == str:
data[key] = jinja_env.from_string(v).render(context)
elif t == dict:
_template_frontmatter(data[key], jinja_env, context)
class _FileType(enum.IntEnum):
HTML_TEMPLATE = enum.auto()
STATIC = enum.auto()
MARKDOWN_TEMPLATE = enum.auto()
def _walk_content(
start_dir: str | Path,
) -> Generator[tuple[str, _FileType], None, None]:
if type(start_dir) is not Path:
start_dir = Path(start_dir)
for item in os.listdir(start_dir):
if item.startswith("_"):
continue
p = start_dir / item
if os.path.isdir(p):
yield from _walk_content(p)
continue
match "" if len((sp := item.split("."))) <= 1 else sp[-1].lower():
case "html":
ftype = _FileType.HTML_TEMPLATE
case "md":
ftype = _FileType.MARKDOWN_TEMPLATE
case _:
ftype = _FileType.STATIC
yield p, ftype
def content(base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any):
walk_dir = base_dir / "content"
for (fpath, filetype) in _walk_content(walk_dir):
site_inner_path = fpath.relative_to(
walk_dir
) # the path of the file *inside* a site directory structure (eg. inside of `_dist` or inside of `content`)
match filetype:
case _FileType.HTML_TEMPLATE | _FileType.MARKDOWN_TEMPLATE:
with open(fpath) as f:
tpl_frontmatter, raw_tpl = extract_frontmatter(f.read())
render_as_directory = bool(t) if (t := tpl_frontmatter.get("asDirectory")) is not None else not (
site_inner_path.stem.lower() == "index"
and site_inner_path.suffix.lower() in [".md", ".html"]
)
target_path = output_dir / (
site_inner_path.with_suffix(".html")
if not render_as_directory
else site_inner_path.parent / site_inner_path.stem / "index.html"
)
os.makedirs(target_path.parent, exist_ok=True)
# rprint(
# INFO_LEADER
# + f"Rendering [bold]{fpath.relative_to(base_dir)}[/bold]"
# f"[white] => {target_path}[/white]"
# )
ctx = {"site": site_config}
_template_frontmatter(tpl_frontmatter, jinja_env, ctx)
ctx["page"] = tpl_frontmatter
match filetype:
case _FileType.HTML_TEMPLATE:
tpl = jinja_env.from_string(raw_tpl)
case _FileType.MARKDOWN_TEMPLATE:
tpl = jinja_env.from_string(
'{% extends "_layouts/base.html" %}{% block main %}{{ rendered | safe }}{% endblock %}'
)
ctx["rendered"] = render_markdown(raw_tpl, escape=False)
case _:
assert False, "impossible state"
res = tpl.render(ctx)
with open(target_path, "w") as f:
f.write(res)
update_counts("rendered", 1)
case _:
if filetype != _FileType.STATIC:
rprint(
WARN_LEADER
+ f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
)
target_path = output_dir / site_inner_path
os.makedirs(target_path.parent, exist_ok=True)
shutil.copy(fpath, target_path)
update_counts("copied", 1)
BLOG_DATE_FORMAT = "%Y-%m-%d"
BLOG_TAG_RE = re.compile(r"[a-zA-Z\d-]+")
def blog(base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any):
walk_dir = base_dir / "blog"
posts = {}
for (fpath, filetype) in _walk_content(walk_dir):
inner_path = fpath.relative_to(walk_dir)
match filetype:
case _FileType.MARKDOWN_TEMPLATE:
with open(fpath) as f:
post_frontmatter, raw_post_md = extract_frontmatter(f.read())
post_slug = (
fpath.name[:-3] if fpath.name != "content.md" else fpath.parent.name
)
if post_slug in posts:
rprint(
ERROR_LEADER + f"Duplicate post slug [bold]{post_slug}[/bold]"
)
raise SystemExit(1)
# check required keys
missing_keys = [
key
for key in ["title", "publishedDate"]
if key not in post_frontmatter
]
if len(missing_keys) > 0:
rprint(
ERROR_LEADER
+ f"Post [bold]{post_slug}[/bold] missing the following frontmatter keys: "
+ ",".join(missing_keys)
)
raise SystemExit(1)
# check tags are valid
if "tags" in post_frontmatter:
invalid_tags = [
tag
for tag in post_frontmatter["tags"]
if not BLOG_TAG_RE.fullmatch(tag)
]
if len(invalid_tags) > 0:
rprint(
ERROR_LEADER
+ f"Post [bold]{post_slug}[/bold] has the following invalid tags: "
+ ",".join(map(repr, invalid_tags))
)
raise SystemExit(1)
target_path = output_dir / "blog" / post_slug / "index.html"
os.makedirs(target_path.parent, exist_ok=True)
# rprint(
# INFO_LEADER
# + f"Rendering [bold]{fpath.relative_to(base_dir)}[/bold]"
# f"[white] => {target_path}[/white]"
# )
if "updatedDate" in post_frontmatter:
post_frontmatter["updatedDate"] = list(
sorted(post_frontmatter["updatedDate"], reverse=True)
)
posts[post_slug] = post_frontmatter
rendered_html = render_markdown(raw_post_md, escape=False)
# build jinja context
ctx = {
"site": site_config,
"post": post_frontmatter,
"content": rendered_html,
"page": {
k: post_frontmatter[k]
for k in ["title", "description", "imageURL"]
if k in post_frontmatter
},
}
ctx["page"]["canonicalURL"] = f"/blog/{post_slug}/"
# execute jinja template
tpl = jinja_env.get_template("_layouts/blog/post.html")
res = tpl.render(ctx)
# dump to file
with open(target_path, "w") as f:
f.write(res)
update_counts("rendered", 1)
case _:
if filetype != _FileType.STATIC:
rprint(
WARN_LEADER
+ f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
)
target_path = output_dir / "blog" / inner_path
os.makedirs(target_path.parent, exist_ok=True)
shutil.copy(fpath, target_path)
update_counts("copied", 1)
# generate listing
post_list = []
tags = []
for slug in posts:
post = posts[slug]
if "hidden" in post and bool(post["hidden"]):
continue
post_list.append(
AbbreviatedPost(
slug,
post["title"],
post.get("description", ""),
post["publishedDate"],
None
if "updatedDate" not in post or len(post["updatedDate"]) == 0
else post["updatedDate"][0],
"favourite" in (post_tags := (post["tags"] if "tags" in post else [])),
post_tags,
)
)
if "tags" in post:
for tag in post["tags"]:
if tag not in tags:
tags.append(tag)
try:
tag_descriptions = load_yaml(open(base_dir / "blogTags.yml").read())
except FileNotFoundError:
tag_descriptions = {}
tags = list(map(lambda x: AbbreviatedTag(x, tag_descriptions.get(x)), tags))
post_list = list(
sorted(
post_list,
key=lambda x: x.publishedDate,
reverse=True,
)
)
with open(output_dir / "blog" / "index.html", "w") as f:
tpl = jinja_env.get_template("_layouts/blog/index.html")
r = tpl.render(
{
"site": site_config,
"page": {
"title": "Blog",
"canonicalURL": "/blog/",
"showAside": True,
},
"posts": post_list,
}
)
f.write(r)
update_counts("generated", 1)
# generate tag list
tags_output_dir = output_dir / "blog" / "tags"
os.makedirs(tags_output_dir, exist_ok=True)
with open(tags_output_dir / "index.html", "w") as f:
tpl = jinja_env.get_template("_layouts/blog/tags.html")
r = tpl.render(
{
"site": site_config,
"page": {
"title": "Blog tags",
"canonicalURL": "/blog/tags/",
"showAside": True,
},
"tags": list(sorted(tags)),
}
)
f.write(r)
update_counts("generated", 1)
# generate tag-specific index pages
tpl = jinja_env.get_template("_layouts/blog/postsFilteredByTag.html")
for tag in tags:
d = tags_output_dir / tag.slug
os.makedirs(d, exist_ok=True)
with open(d / "index.html", "w") as f:
f.write(
tpl.render(
{
"site": site_config,
"page": {
"title": f"{tag.slug} :: Blog Tags",
"canonicalURL": f"/blog/tags/{tag.slug}/",
"showAside": True,
},
"tag": tag,
"posts": [p for p in post_list if tag.slug in p.tags],
}
)
)
update_counts("generated", 1)
# generate feeds
with open(output_dir / "blog" / "feed.atom", "w") as f:
f.write(feeds.atom(site_config, post_list))
with open(output_dir / "blog" / "feed.json", "w") as f:
f.write(feeds.json(site_config, post_list))
update_counts("generated", 2)
def caddy_config(
base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any
):
try:
x = open(base_dir / "redirects.yml").read()
raw_redirect_rules = load_yaml(x)
except FileNotFoundError:
raw_redirect_rules = []
redirects = []
for i, rule in enumerate(raw_redirect_rules):
if "from" not in rule or "to" not in rule:
rprint(ERROR_LEADER + f"Redirect rule {i} missing either from or to field")
raise SystemExit(1)
status = rule.get("code", 302)
if not ((300 <= status <= 399) or status == 401):
rprint(
ERROR_LEADER
+ f"Redirect rule {i} has an invalid status code (not 3xx or 401)"
)
raise SystemExit(1)
redirects.append(
{
"match": [{"path": [rule["from"]]}],
"handle": [
{
"handler": "static_response",
"headers": {
"Location": [rule["to"]],
},
"status_code": rule.get("code", 302),
}
],
}
)
conf = {
"admin": {"disabled": True},
"apps": {
"http": {
"servers": {
"srv0": {
"automatic_https": {"disable": True},
"listen": [":8080"],
"routes": [
{
"handle": [
{
"handler": "headers",
"response": {
"deferred": True,
"delete": ["Server"],
},
}
]
},
{
"handle": [
{
"handler": "subroute",
"routes": [
*redirects,
{
"handle": [
{
"handler": "file_server",
"root": "./html",
}
],
},
],
"errors": {
"routes": [
{
"handle": [
{
"handler": "rewrite",
"uri": "/404.html",
},
{
"handler": "file_server",
"root": "./html",
},
],
"terminal": True,
},
],
},
},
],
},
],
},
},
},
},
}
with open(output_dir / "caddy_config.json", "w") as f:
json.dump(conf, f, indent="\t")
update_counts("generated", 1)