film-dev-cost-scraper/postprocessor/__main__.py

import sys
from collections import defaultdict
from typing_extensions import cast
from yattag import Doc, AsIs
import re
from functools import reduce
import json
import time
import datetime
from pathlib import Path

SLUG_PATTERN = re.compile(r"[\W_]+")


def slugify(value):
    value = value.encode("ascii", errors="ignore").decode()
    value = SLUG_PATTERN.sub("-", value)
    return value.strip("-")


JSONFILE = sys.argv[1]
OUTPUTFILE = sys.argv[2]


col_titles = {
    # included in the CSV
    "lab": "Lab",
    "chemistry": "Chemistry",
    "format": "Format",
    "subformat": "Subformat",
    "includesSendShipping": "Includes outbound shipping?",
    "sendShippingType": "Outbound shipping type",
    "returnShippingCost": "Return shipping cost",
    "returnShippingType": "Return shipping provider",
    "cost": "Development cost",
    "resolution": "Scan resolution",
    "resolutionName": "Scan resolution name",
    "url": "Product URL",
    # render only
    "outboundShipping": "Outbound shipping",
    "returnShipping": "Return shipping",
    "renderResolution": "Scan resolution",
    "pricePerPixel": "Price per pixel",
    "link": "Order page",
    "calculatedPrice": "Calculated price",
}

entries_by_type = defaultdict(lambda: [])
notes_by_type = {}


def _render_line(*args, **kwargs):
    d = Doc()
    d.line(*args, **kwargs)
    return d.getvalue()


def _format_price(price):
    return "£{:.2f}".format(price)


raw_data_object = None

with open(JSONFILE) as f:
    raw_data_object = json.load(f)

for row in raw_data_object["data"]:
    entries_by_type[(row["chemistry"], row["format"], row["subformat"])].append(row)

for row in raw_data_object["notes"]:
    notes_by_type[(row["chemistry"], row["format"], row["subformat"])] = row["note"]

doc, tag, text, line = Doc().ttl()

doc.asis("<!DOCTYPE html>")
with tag("html"):
    with tag("head"):
        doc.stag("meta", charset="utf-8")
        doc.stag("meta", name="viewport", content="width=device-width, initial-scale=1")
        doc.stag(
            "link",
            rel="stylesheet",
            href="https://www.akpain.net/assets/css/risotto.css",
        )
        doc.stag(
            "link",
            rel="stylesheet",
            href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css",
        )
        doc.stag(
            "link",
            rel="stylesheet",
            href="https://cdn.jsdelivr.net/npm/simple-datatables@latest/dist/style.css",
        )
        with tag("script", src="https://cdn.jsdelivr.net/npm/simple-datatables@latest"):
            doc.asis("")

    with tag("body"):
        with tag("div", klass="container pt-3"):

            line("a", "[abi abi] $", klass="pe-3", href="https://www.akpain.net")
            line("a", "back to photography", href="https://www.akpain.net/photography/")

            line("h1", "Film Development Price Comparison", klass="pt-2")

            line("p", "This is my attempt to work out the best value for money film developing and scanning service that's available in the UK. Labs are compared as like-for-like as possible, but some variation (especially in scan size) is inevitable.")
            with tag("p"):
                text("If your favourite/local/whatever lab isn't listed here, ")
                line("a", "let me know", href="https://www.akpain.net#contact")
                text(" and I'll add it! Likewise, if you want to see E6, ECN2, half frame, 120 or anything else here, please do tell me.")

            with tag("p"):
                text(
                "Development costs last updated "
                + datetime.datetime.utcfromtimestamp(raw_data_object["time"]).strftime(
                    "%Y-%m-%d %H:%M:%S"
                )
                + ". Price per pixel figures do not include estimates for outbound or return shipping. "
                )
                line("a", "Raw data available here", href="rawdata.json")
                text(".")


            with tag("div", klass="card", style="width: 18rem;"):
                with tag("div", klass="card-body"):
                    line("div", "Contents", klass="card-title", style="font-family: var(--font-monospace)")
                    with tag("ul", klass="card-text"):
                        for key in entries_by_type:
                            chemistry, format, subformat = key
                            slug = slugify(chemistry + format + subformat)
                            with tag("li"):
                                line("a", f"{chemistry} {format} ({subformat})", href=f"#{slug}-title")

            slugs = []

            for key in entries_by_type:
                chemistry, format, subformat = key

                slug = slugify(chemistry + format + subformat)
                slugs.append(slug)

                line(
                    "h2",
                    f"{chemistry} {format} ({subformat})",
                    klass="h3 pt-4",
                    id=slug + "-title",
                )

                if key in notes_by_type:
                    line("p", notes_by_type[key])

                cols = [
                    ("lab", lambda x: x["lab"]),
                    (
                        "outboundShipping",
                        lambda x: "×"
                        if x["includesSendShipping"].lower() == "no"
                        else x["sendShippingType"],
                    ),
                    (
                        "returnShipping",
                        lambda x: (
                            "Free"
                            if (c := float(x["returnShippingCost"])) == 0
                            else _format_price(c)
                        )
                        + f" ({x['returnShippingType']})",
                    ),
                    ("cost", lambda x: _format_price(float(x["cost"]))),
                    (
                        "renderResolution",
                        lambda x: f"{x['resolution']} ({repr(x['resolutionName'])})",
                    ),
                    (
                        "pricePerPixel",
                        lambda x: "{:.5f}p".format(
                            float(x["cost"])
                            * 100
                            / reduce(
                                lambda y, z: y * z,
                                map(int, x["resolution"].split("x")),
                                1,
                            )
                        ),
                    ),
                    ("link", lambda x: _render_line("a", "Link", href=x["url"])),
                ]

                # begin working out price per pixel colour scales
                pppfn = None
                for i, item in enumerate(cols):
                    if item[0] == "pricePerPixel":
                        pppfn = item[1]
                        break
                assert pppfn is not None
                pppcolours = {pppfn(data): "" for data in entries_by_type[key]}
                coldiff = (
                    int(120 / (len(pppcolours) - 1)) if len(pppcolours) - 1 != 0 else 0
                )
                for i, (val, rawval) in enumerate(
                    sorted(
                        map(lambda x: (float(x[:-1]), x), pppcolours.keys()),
                        key=lambda y: y[0],
                    )
                ):
                    pppcolours[rawval] = f"hsl({120 - (i * coldiff)}, 71%, 73%)"
                # end

                with tag("table", klass="table table-hover", id=slug):
                    with tag("thead"):
                        with tag("tr"):
                            for t, _ in cols:
                                line("th", col_titles[t], scope="col")

                    with tag("tbody"):
                        for data in sorted(
                            entries_by_type[key], key=lambda x: x["lab"]
                        ):
                            with tag("tr"):
                                for i, (key, fn) in enumerate(cols):
                                    if i == 0:
                                        line("th", fn(data), scope="row")
                                    else:
                                        with tag("td"):
                                            val = fn(data)
                                            doc.asis(val)

                                            if key == "pricePerPixel":
                                                doc.attr(
                                                    style="background-color: "
                                                    + pppcolours[val]
                                                )

            with tag("script"):
                doc.asis("const slugs = ")
                doc.asis(json.dumps(slugs))
                doc.asis(";\n")
                with open(Path(__file__).resolve().parent / "page.js") as f:
                    doc.asis(f.read())

        with tag(
            "script",
            src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js",
        ):
            doc.asis()

with open(OUTPUTFILE, "w") as f:
    f.write(doc.getvalue())