website/generator/process.py

from util import *
from pathlib import Path
from jinja2 import Environment
import shutil
from rich import print as rprint
import feeds
from typedef import *
import re
import enum
from collections.abc import Generator
import json


def _template_frontmatter(data: any, jinja_env: Environment, context: any):
    for key in data:
        v = data[key]
        if (t := type(v)) == str:
            data[key] = jinja_env.from_string(v).render(context)
        elif t == dict:
            _template_frontmatter(data[key], jinja_env, context)


class _FileType(enum.IntEnum):
    HTML_TEMPLATE = enum.auto()
    STATIC = enum.auto()
    MARKDOWN_TEMPLATE = enum.auto()


def _walk_content(
    start_dir: str | Path,
) -> Generator[tuple[str, _FileType], None, None]:
    if type(start_dir) is not Path:
        start_dir = Path(start_dir)

    for item in os.listdir(start_dir):
        if item.startswith("_"):
            continue

        p = start_dir / item

        if os.path.isdir(p):
            yield from _walk_content(p)
            continue

        match "" if len((sp := item.split("."))) <= 1 else sp[-1].lower():
            case "html":
                ftype = _FileType.HTML_TEMPLATE
            case "md":
                ftype = _FileType.MARKDOWN_TEMPLATE
            case _:
                ftype = _FileType.STATIC

        yield p, ftype


def content(base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any):
    walk_dir = base_dir / "content"
    for (fpath, filetype) in _walk_content(walk_dir):
        site_inner_path = fpath.relative_to(
            walk_dir
        )  # the path of the file *inside* a site directory structure (eg. inside of `_dist` or inside of `content`)

        match filetype:
            case _FileType.HTML_TEMPLATE | _FileType.MARKDOWN_TEMPLATE:
                with open(fpath) as f:
                    tpl_frontmatter, raw_tpl = extract_frontmatter(f.read())

                render_as_directory = bool(t) if (t := tpl_frontmatter.get("asDirectory")) is not None else not (
                    site_inner_path.stem.lower() == "index"
                    and site_inner_path.suffix.lower() in [".md", ".html"]
                )

                target_path = output_dir / (
                    site_inner_path.with_suffix(".html")
                    if not render_as_directory
                    else site_inner_path.parent / site_inner_path.stem / "index.html"
                )
                os.makedirs(target_path.parent, exist_ok=True)

                # rprint(
                #     INFO_LEADER
                #     + f"Rendering [bold]{fpath.relative_to(base_dir)}[/bold]"
                #     f"[white] => {target_path}[/white]"
                # )

                ctx = {"site": site_config}
                _template_frontmatter(tpl_frontmatter, jinja_env, ctx)
                ctx["page"] = tpl_frontmatter

                match filetype:
                    case _FileType.HTML_TEMPLATE:
                        tpl = jinja_env.from_string(raw_tpl)
                    case _FileType.MARKDOWN_TEMPLATE:
                        tpl = jinja_env.from_string(
                            '{% extends "_layouts/base.html" %}{% block main %}{{ rendered | safe }}{% endblock %}'
                        )
                        ctx["rendered"] = render_markdown(raw_tpl, escape=False)
                    case _:
                        assert False, "impossible state"
                res = tpl.render(ctx)

                with open(target_path, "w") as f:
                    f.write(res)

                update_counts("rendered", 1)
            case _:
                if filetype != _FileType.STATIC:
                    rprint(
                        WARN_LEADER
                        + f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
                    )
                target_path = output_dir / site_inner_path
                os.makedirs(target_path.parent, exist_ok=True)
                shutil.copy(fpath, target_path)
                update_counts("copied", 1)


BLOG_DATE_FORMAT = "%Y-%m-%d"
BLOG_TAG_RE = re.compile(r"[a-zA-Z\d-]+")


def blog(base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any):
    walk_dir = base_dir / "blog"
    posts = {}
    for (fpath, filetype) in _walk_content(walk_dir):
        inner_path = fpath.relative_to(walk_dir)

        match filetype:
            case _FileType.MARKDOWN_TEMPLATE:
                with open(fpath) as f:
                    post_frontmatter, raw_post_md = extract_frontmatter(f.read())

                post_slug = (
                    fpath.name[:-3] if fpath.name != "content.md" else fpath.parent.name
                )

                if post_slug in posts:
                    rprint(
                        ERROR_LEADER + f"Duplicate post slug [bold]{post_slug}[/bold]"
                    )
                    raise SystemExit(1)

                # check required keys
                missing_keys = [
                    key
                    for key in ["title", "publishedDate"]
                    if key not in post_frontmatter
                ]
                if len(missing_keys) > 0:
                    rprint(
                        ERROR_LEADER
                        + f"Post [bold]{post_slug}[/bold] missing the following frontmatter keys: "
                        + ",".join(missing_keys)
                    )
                    raise SystemExit(1)

                # check tags are valid
                if "tags" in post_frontmatter:
                    invalid_tags = [
                        tag
                        for tag in post_frontmatter["tags"]
                        if not BLOG_TAG_RE.fullmatch(tag)
                    ]
                    if len(invalid_tags) > 0:
                        rprint(
                            ERROR_LEADER
                            + f"Post [bold]{post_slug}[/bold] has the following invalid tags: "
                            + ",".join(map(repr, invalid_tags))
                        )
                        raise SystemExit(1)

                target_path = output_dir / "blog" / post_slug / "index.html"
                os.makedirs(target_path.parent, exist_ok=True)

                # rprint(
                #     INFO_LEADER
                #     + f"Rendering [bold]{fpath.relative_to(base_dir)}[/bold]"
                #     f"[white] => {target_path}[/white]"
                # )

                if "updatedDate" in post_frontmatter:
                    post_frontmatter["updatedDate"] = list(
                        sorted(post_frontmatter["updatedDate"], reverse=True)
                    )

                posts[post_slug] = post_frontmatter

                rendered_html = render_markdown(raw_post_md, escape=False)

                # build jinja context
                ctx = {
                    "site": site_config,
                    "post": post_frontmatter,
                    "content": rendered_html,
                    "page": {
                        k: post_frontmatter[k]
                        for k in ["title", "description", "imageURL"]
                        if k in post_frontmatter
                    },
                }
                ctx["page"]["canonicalURL"] = f"/blog/{post_slug}/"

                # execute jinja template
                tpl = jinja_env.get_template("_layouts/blog/post.html")
                res = tpl.render(ctx)

                # dump to file
                with open(target_path, "w") as f:
                    f.write(res)

                update_counts("rendered", 1)
            case _:
                if filetype != _FileType.STATIC:
                    rprint(
                        WARN_LEADER
                        + f"Treating [bold]{fpath}[/bold] (type {filetype.name}) as a static file"
                    )
                target_path = output_dir / "blog" / inner_path
                os.makedirs(target_path.parent, exist_ok=True)
                shutil.copy(fpath, target_path)
                update_counts("copied", 1)

    # generate listing
    post_list = []
    tags = []
    for slug in posts:
        post = posts[slug]

        if "hidden" in post and bool(post["hidden"]):
            continue

        post_list.append(
            AbbreviatedPost(
                slug,
                post["title"],
                post.get("description", ""),
                post["publishedDate"],
                None
                if "updatedDate" not in post or len(post["updatedDate"]) == 0
                else post["updatedDate"][0],
                "favourite" in (post_tags := (post["tags"] if "tags" in post else [])),
                post_tags,
            )
        )

        if "tags" in post:
            for tag in post["tags"]:
                if tag not in tags:
                    tags.append(tag)

    try:
        tag_descriptions = load_yaml(open(base_dir / "blogTags.yml").read())
    except FileNotFoundError:
        tag_descriptions = {}

    tags = list(map(lambda x: AbbreviatedTag(x, tag_descriptions.get(x)), tags))

    post_list = list(
        sorted(
            post_list,
            key=lambda x: x.publishedDate,
            reverse=True,
        )
    )

    with open(output_dir / "blog" / "index.html", "w") as f:
        tpl = jinja_env.get_template("_layouts/blog/index.html")
        r = tpl.render(
            {
                "site": site_config,
                "page": {
                    "title": "Blog",
                    "canonicalURL": "/blog/",
                    "showAside": True,
                },
                "posts": post_list,
            }
        )
        f.write(r)

    update_counts("generated", 1)

    # generate tag list
    tags_output_dir = output_dir / "blog" / "tags"
    os.makedirs(tags_output_dir, exist_ok=True)
    with open(tags_output_dir / "index.html", "w") as f:
        tpl = jinja_env.get_template("_layouts/blog/tags.html")
        r = tpl.render(
            {
                "site": site_config,
                "page": {
                    "title": "Blog tags",
                    "canonicalURL": "/blog/tags/",
                    "showAside": True,
                },
                "tags": list(sorted(tags)),
            }
        )
        f.write(r)
    update_counts("generated", 1)

    # generate tag-specific index pages
    tpl = jinja_env.get_template("_layouts/blog/postsFilteredByTag.html")
    for tag in tags:
        d = tags_output_dir / tag.slug
        os.makedirs(d, exist_ok=True)
        with open(d / "index.html", "w") as f:
            f.write(
                tpl.render(
                    {
                        "site": site_config,
                        "page": {
                            "title": f"{tag.slug} :: Blog Tags",
                            "canonicalURL": f"/blog/tags/{tag.slug}/",
                            "showAside": True,
                        },
                        "tag": tag,
                        "posts": [p for p in post_list if tag.slug in p.tags],
                    }
                )
            )
        update_counts("generated", 1)

    # generate feeds
    with open(output_dir / "blog" / "feed.atom", "w") as f:
        f.write(feeds.atom(site_config, post_list))

    with open(output_dir / "blog" / "feed.json", "w") as f:
        f.write(feeds.json(site_config, post_list))

    update_counts("generated", 2)


def caddy_config(
    base_dir: Path, output_dir: Path, jinja_env: Environment, site_config: any
):
    try:
        x = open(base_dir / "redirects.yml").read()
        raw_redirect_rules = load_yaml(x)
    except FileNotFoundError:
        raw_redirect_rules = []

    redirects = []

    for i, rule in enumerate(raw_redirect_rules):
        if "from" not in rule or "to" not in rule:
            rprint(ERROR_LEADER + f"Redirect rule {i} missing either from or to field")
            raise SystemExit(1)
        status = rule.get("code", 302)
        if not ((300 <= status <= 399) or status == 401):
            rprint(
                ERROR_LEADER
                + f"Redirect rule {i} has an invalid status code (not 3xx or 401)"
            )
            raise SystemExit(1)
        redirects.append(
            {
                "match": [{"path": [rule["from"]]}],
                "handle": [
                    {
                        "handler": "static_response",
                        "headers": {
                            "Location": [rule["to"]],
                        },
                        "status_code": rule.get("code", 302),
                    }
                ],
            }
        )

    conf = {
        "admin": {"disabled": True},
        "apps": {
            "http": {
                "servers": {
                    "srv0": {
                        "automatic_https": {"disable": True},
                        "listen": [":8080"],
                        "routes": [
                            {
                                "handle": [
                                    {
                                        "handler": "headers",
                                        "response": {
                                            "deferred": True,
                                            "delete": ["Server"],
                                        },
                                    }
                                ]
                            },
                            {
                                "handle": [
                                    {
                                        "handler": "subroute",
                                        "routes": [
                                            *redirects,
                                            {
                                                "handle": [
                                                    {
                                                        "handler": "file_server",
                                                        "root": "./html",
                                                    }
                                                ],
                                            },
                                        ],
                                        "errors": {
                                            "routes": [
                                                {
                                                    "handle": [
                                                        {
                                                            "handler": "rewrite",
                                                            "uri": "/404.html",
                                                        },
                                                        {
                                                            "handler": "file_server",
                                                            "root": "./html",
                                                        },
                                                    ],
                                                    "terminal": True,
                                                },
                                            ],
                                        },
                                    },
                                ],
                            },
                        ],
                    },
                },
            },
        },
    }

    with open(output_dir / "caddy_config.json", "w") as f:
        json.dump(conf, f, indent="\t")

    update_counts("generated", 1)