From 3ea984c2eff81e484a1bcca72d2b7cf3e90b4855 Mon Sep 17 00:00:00 2001 From: AKP Date: Mon, 26 Aug 2024 01:08:27 +0100 Subject: [PATCH] Revert "Initial commit" This reverts commit 2dab7acce521b5fad4fcc1bb9240dd404332ebac. --- bundle.sh | 13 - poetry.lock | 367 -------------------------- postprocessor/__main__.py | 245 ----------------- postprocessor/page.js | 9 - pyproject.toml | 17 -- pyrightconfig.json | 4 - scraper/__main__.py | 51 ---- scraper/scrapers.py | 534 -------------------------------------- 8 files changed, 1240 deletions(-) delete mode 100644 bundle.sh delete mode 100644 poetry.lock delete mode 100644 postprocessor/__main__.py delete mode 100644 postprocessor/page.js delete mode 100644 pyproject.toml delete mode 100644 pyrightconfig.json delete mode 100644 scraper/__main__.py delete mode 100644 scraper/scrapers.py diff --git a/bundle.sh b/bundle.sh deleted file mode 100644 index aff5496..0000000 --- a/bundle.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -DATAFILE=$1 -DIR=$(mktemp -p . -d) - -poetry run python3 postprocessor/ $DATAFILE $DIR/index.html -cp $DATAFILE $DIR/rawdata.json - -zip --junk-paths bundle.zip $DIR/* - -rm -r $DIR \ No newline at end of file diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 0fcddc3..0000000 --- a/poetry.lock +++ /dev/null @@ -1,367 +0,0 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. - -[[package]] -name = "attrs" -version = "24.2.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.7" -files = [ - {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, - {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, -] - -[package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] - -[[package]] -name = "certifi" -version = "2024.7.4" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, - {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, -] - -[[package]] -name = "cffi" -version = "1.17.0" -description = "Foreign Function Interface for Python calling C code." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"}, - {file = "cffi-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0ce71725cacc9ebf839630772b07eeec220cbb5f03be1399e0457a1464f8e1a"}, - {file = "cffi-1.17.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c815270206f983309915a6844fe994b2fa47e5d05c4c4cef267c3b30e34dbe42"}, - {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6bdcd415ba87846fd317bee0774e412e8792832e7805938987e4ede1d13046d"}, - {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a98748ed1a1df4ee1d6f927e151ed6c1a09d5ec21684de879c7ea6aa96f58f2"}, - {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a048d4f6630113e54bb4b77e315e1ba32a5a31512c31a273807d0027a7e69ab"}, - {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24aa705a5f5bd3a8bcfa4d123f03413de5d86e497435693b638cbffb7d5d8a1b"}, - {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:856bf0924d24e7f93b8aee12a3a1095c34085600aa805693fb7f5d1962393206"}, - {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4304d4416ff032ed50ad6bb87416d802e67139e31c0bde4628f36a47a3164bfa"}, - {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:331ad15c39c9fe9186ceaf87203a9ecf5ae0ba2538c9e898e3a6967e8ad3db6f"}, - {file = "cffi-1.17.0-cp310-cp310-win32.whl", hash = "sha256:669b29a9eca6146465cc574659058ed949748f0809a2582d1f1a324eb91054dc"}, - {file = "cffi-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:48b389b1fd5144603d61d752afd7167dfd205973a43151ae5045b35793232aa2"}, - {file = "cffi-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5d97162c196ce54af6700949ddf9409e9833ef1003b4741c2b39ef46f1d9720"}, - {file = "cffi-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ba5c243f4004c750836f81606a9fcb7841f8874ad8f3bf204ff5e56332b72b9"}, - {file = "cffi-1.17.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb9333f58fc3a2296fb1d54576138d4cf5d496a2cc118422bd77835e6ae0b9cb"}, - {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:435a22d00ec7d7ea533db494da8581b05977f9c37338c80bc86314bec2619424"}, - {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1df34588123fcc88c872f5acb6f74ae59e9d182a2707097f9e28275ec26a12d"}, - {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df8bb0010fdd0a743b7542589223a2816bdde4d94bb5ad67884348fa2c1c67e8"}, - {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b5b9712783415695663bd463990e2f00c6750562e6ad1d28e072a611c5f2a6"}, - {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ffef8fd58a36fb5f1196919638f73dd3ae0db1a878982b27a9a5a176ede4ba91"}, - {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e67d26532bfd8b7f7c05d5a766d6f437b362c1bf203a3a5ce3593a645e870b8"}, - {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45f7cd36186db767d803b1473b3c659d57a23b5fa491ad83c6d40f2af58e4dbb"}, - {file = "cffi-1.17.0-cp311-cp311-win32.whl", hash = "sha256:a9015f5b8af1bb6837a3fcb0cdf3b874fe3385ff6274e8b7925d81ccaec3c5c9"}, - {file = "cffi-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:b50aaac7d05c2c26dfd50c3321199f019ba76bb650e346a6ef3616306eed67b0"}, - {file = "cffi-1.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aec510255ce690d240f7cb23d7114f6b351c733a74c279a84def763660a2c3bc"}, - {file = "cffi-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2770bb0d5e3cc0e31e7318db06efcbcdb7b31bcb1a70086d3177692a02256f59"}, - {file = "cffi-1.17.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db9a30ec064129d605d0f1aedc93e00894b9334ec74ba9c6bdd08147434b33eb"}, - {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a47eef975d2b8b721775a0fa286f50eab535b9d56c70a6e62842134cf7841195"}, - {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3e0992f23bbb0be00a921eae5363329253c3b86287db27092461c887b791e5e"}, - {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6107e445faf057c118d5050560695e46d272e5301feffda3c41849641222a828"}, - {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb862356ee9391dc5a0b3cbc00f416b48c1b9a52d252d898e5b7696a5f9fe150"}, - {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c1c13185b90bbd3f8b5963cd8ce7ad4ff441924c31e23c975cb150e27c2bf67a"}, - {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17c6d6d3260c7f2d94f657e6872591fe8733872a86ed1345bda872cfc8c74885"}, - {file = "cffi-1.17.0-cp312-cp312-win32.whl", hash = "sha256:c3b8bd3133cd50f6b637bb4322822c94c5ce4bf0d724ed5ae70afce62187c492"}, - {file = "cffi-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:dca802c8db0720ce1c49cce1149ff7b06e91ba15fa84b1d59144fef1a1bc7ac2"}, - {file = "cffi-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ce01337d23884b21c03869d2f68c5523d43174d4fc405490eb0091057943118"}, - {file = "cffi-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cab2eba3830bf4f6d91e2d6718e0e1c14a2f5ad1af68a89d24ace0c6b17cced7"}, - {file = "cffi-1.17.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b9cbc8f7ac98a739558eb86fabc283d4d564dafed50216e7f7ee62d0d25377"}, - {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b00e7bcd71caa0282cbe3c90966f738e2db91e64092a877c3ff7f19a1628fdcb"}, - {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41f4915e09218744d8bae14759f983e466ab69b178de38066f7579892ff2a555"}, - {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4760a68cab57bfaa628938e9c2971137e05ce48e762a9cb53b76c9b569f1204"}, - {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:011aff3524d578a9412c8b3cfaa50f2c0bd78e03eb7af7aa5e0df59b158efb2f"}, - {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a003ac9edc22d99ae1286b0875c460351f4e101f8c9d9d2576e78d7e048f64e0"}, - {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ef9528915df81b8f4c7612b19b8628214c65c9b7f74db2e34a646a0a2a0da2d4"}, - {file = "cffi-1.17.0-cp313-cp313-win32.whl", hash = "sha256:70d2aa9fb00cf52034feac4b913181a6e10356019b18ef89bc7c12a283bf5f5a"}, - {file = "cffi-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:b7b6ea9e36d32582cda3465f54c4b454f62f23cb083ebc7a94e2ca6ef011c3a7"}, - {file = "cffi-1.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:964823b2fc77b55355999ade496c54dde161c621cb1f6eac61dc30ed1b63cd4c"}, - {file = "cffi-1.17.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:516a405f174fd3b88829eabfe4bb296ac602d6a0f68e0d64d5ac9456194a5b7e"}, - {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dec6b307ce928e8e112a6bb9921a1cb00a0e14979bf28b98e084a4b8a742bd9b"}, - {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4094c7b464cf0a858e75cd14b03509e84789abf7b79f8537e6a72152109c76e"}, - {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2404f3de742f47cb62d023f0ba7c5a916c9c653d5b368cc966382ae4e57da401"}, - {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa9d43b02a0c681f0bfbc12d476d47b2b2b6a3f9287f11ee42989a268a1833c"}, - {file = "cffi-1.17.0-cp38-cp38-win32.whl", hash = "sha256:0bb15e7acf8ab35ca8b24b90af52c8b391690ef5c4aec3d31f38f0d37d2cc499"}, - {file = "cffi-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a7350f6706b31f457c1457d3a3259ff9071a66f312ae64dc024f049055f72c"}, - {file = "cffi-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a2ddbac59dc3716bc79f27906c010406155031a1c801410f1bafff17ea304d2"}, - {file = "cffi-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6327b572f5770293fc062a7ec04160e89741e8552bf1c358d1a23eba68166759"}, - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbc183e7bef690c9abe5ea67b7b60fdbca81aa8da43468287dae7b5c046107d4"}, - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bdc0f1f610d067c70aa3737ed06e2726fd9d6f7bfee4a351f4c40b6831f4e82"}, - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d872186c1617d143969defeadac5a904e6e374183e07977eedef9c07c8953bf"}, - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d46ee4764b88b91f16661a8befc6bfb24806d885e27436fdc292ed7e6f6d058"}, - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"}, - {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e60821d312f99d3e1569202518dddf10ae547e799d75aef3bca3a2d9e8ee693"}, - {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:eb09b82377233b902d4c3fbeeb7ad731cdab579c6c6fda1f763cd779139e47c3"}, - {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24658baf6224d8f280e827f0a50c46ad819ec8ba380a42448e24459daf809cf4"}, - {file = "cffi-1.17.0-cp39-cp39-win32.whl", hash = "sha256:0fdacad9e0d9fc23e519efd5ea24a70348305e8d7d85ecbb1a5fa66dc834e7fb"}, - {file = "cffi-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cbc78dc018596315d4e7841c8c3a7ae31cc4d638c9b627f87d52e8abaaf2d29"}, - {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"}, -] - -[package.dependencies] -pycparser = "*" - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.2" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, -] - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false -python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] - -[[package]] -name = "idna" -version = "3.7" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, -] - -[[package]] -name = "outcome" -version = "1.3.0.post0" -description = "Capture the outcome of Python function calls." -optional = false -python-versions = ">=3.7" -files = [ - {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"}, - {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"}, -] - -[package.dependencies] -attrs = ">=19.2.0" - -[[package]] -name = "pycparser" -version = "2.22" -description = "C parser in Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, - {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, -] - -[[package]] -name = "pysocks" -version = "1.7.1" -description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, - {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, - {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, -] - -[[package]] -name = "selenium" -version = "4.23.1" -description = "Official Python bindings for Selenium WebDriver" -optional = false -python-versions = ">=3.8" -files = [ - {file = "selenium-4.23.1-py3-none-any.whl", hash = "sha256:3a8d9f23dc636bd3840dd56f00c2739e32ec0c1e34a821dd553e15babef24477"}, - {file = "selenium-4.23.1.tar.gz", hash = "sha256:128d099e66284437e7128d2279176ec7a06e6ec7426e167f5d34987166bd8f46"}, -] - -[package.dependencies] -certifi = ">=2021.10.8" -trio = ">=0.17,<1.0" -trio-websocket = ">=0.9,<1.0" -typing_extensions = ">=4.9,<5.0" -urllib3 = {version = ">=1.26,<3", extras = ["socks"]} -websocket-client = ">=1.8,<2.0" - -[[package]] -name = "sniffio" -version = "1.3.1" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, - {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, -] - -[[package]] -name = "sortedcontainers" -version = "2.4.0" -description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -optional = false -python-versions = "*" -files = [ - {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, - {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, -] - -[[package]] -name = "tqdm" -version = "4.66.5" -description = "Fast, Extensible Progress Meter" -optional = false -python-versions = ">=3.7" -files = [ - {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, - {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "trio" -version = "0.26.2" -description = "A friendly Python library for async concurrency and I/O" -optional = false -python-versions = ">=3.8" -files = [ - {file = "trio-0.26.2-py3-none-any.whl", hash = "sha256:c5237e8133eb0a1d72f09a971a55c28ebe69e351c783fc64bc37db8db8bbe1d0"}, - {file = "trio-0.26.2.tar.gz", hash = "sha256:0346c3852c15e5c7d40ea15972c4805689ef2cb8b5206f794c9c19450119f3a4"}, -] - -[package.dependencies] -attrs = ">=23.2.0" -cffi = {version = ">=1.14", markers = "os_name == \"nt\" and implementation_name != \"pypy\""} -exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} -idna = "*" -outcome = "*" -sniffio = ">=1.3.0" -sortedcontainers = "*" - -[[package]] -name = "trio-websocket" -version = "0.11.1" -description = "WebSocket library for Trio" -optional = false -python-versions = ">=3.7" -files = [ - {file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"}, - {file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"}, -] - -[package.dependencies] -exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} -trio = ">=0.11" -wsproto = ">=0.14" - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "urllib3" -version = "2.2.2" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, - {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, -] - -[package.dependencies] -pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "websocket-client" -version = "1.8.0" -description = "WebSocket client for Python with low level API options" -optional = false -python-versions = ">=3.8" -files = [ - {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, - {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, -] - -[package.extras] -docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] -optional = ["python-socks", "wsaccel"] -test = ["websockets"] - -[[package]] -name = "wsproto" -version = "1.2.0" -description = "WebSockets state-machine based protocol implementation" -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"}, - {file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"}, -] - -[package.dependencies] -h11 = ">=0.9.0,<1" - -[[package]] -name = "yattag" -version = "1.16.0" -description = "Generate HTML or XML in a pythonic way. Pure python alternative to web template engines.Can fill HTML forms with default values and error messages." -optional = false -python-versions = "*" -files = [ - {file = "yattag-1.16.0.tar.gz", hash = "sha256:0978247b9754d9f44e3703c64374ab9fa872d18de95ac5772fdfdd3c3f0d0706"}, -] - -[metadata] -lock-version = "2.0" -python-versions = "^3.10" -content-hash = "1e0968d348899083a0e9546a3c20a6a43a16fc7eede114227563d98b7796ae71" diff --git a/postprocessor/__main__.py b/postprocessor/__main__.py deleted file mode 100644 index afdc65c..0000000 --- a/postprocessor/__main__.py +++ /dev/null @@ -1,245 +0,0 @@ -import sys -from collections import defaultdict -from typing_extensions import cast -from yattag import Doc, AsIs -import re -from functools import reduce -import json -import time -import datetime -from pathlib import Path - -SLUG_PATTERN = re.compile(r"[\W_]+") - - -def slugify(value): - value = value.encode("ascii", errors="ignore").decode() - value = SLUG_PATTERN.sub("-", value) - return value.strip("-") - - -JSONFILE = sys.argv[1] -OUTPUTFILE = sys.argv[2] - - -col_titles = { - # included in the CSV - "lab": "Lab", - "chemistry": "Chemistry", - "format": "Format", - "subformat": "Subformat", - "includesSendShipping": "Includes outbound shipping?", - "sendShippingType": "Outbound shipping type", - "returnShippingCost": "Return shipping cost", - "returnShippingType": "Return shipping provider", - "cost": "Development cost", - "resolution": "Scan resolution", - "resolutionName": "Scan resolution name", - "url": "Product URL", - # render only - "outboundShipping": "Outbound shipping", - "returnShipping": "Return shipping", - "renderResolution": "Scan resolution", - "pricePerPixel": "Price per pixel", - "link": "Order page", - "calculatedPrice": "Calculated price", -} - -entries_by_type = defaultdict(lambda: []) -notes_by_type = {} - - -def _render_line(*args, **kwargs): - d = Doc() - d.line(*args, **kwargs) - return d.getvalue() - - -def _format_price(price): - return "£{:.2f}".format(price) - - -raw_data_object = None - -with open(JSONFILE) as f: - raw_data_object = json.load(f) - -for row in raw_data_object["data"]: - entries_by_type[(row["chemistry"], row["format"], row["subformat"])].append(row) - -for row in raw_data_object["notes"]: - notes_by_type[(row["chemistry"], row["format"], row["subformat"])] = row["note"] - -doc, tag, text, line = Doc().ttl() - -doc.asis("") -with tag("html"): - with tag("head"): - doc.stag("meta", charset="utf-8") - doc.stag("meta", name="viewport", content="width=device-width, initial-scale=1") - doc.stag( - "link", - rel="stylesheet", - href="https://www.akpain.net/assets/css/risotto.css", - ) - doc.stag( - "link", - rel="stylesheet", - href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css", - ) - doc.stag( - "link", - rel="stylesheet", - href="https://cdn.jsdelivr.net/npm/simple-datatables@latest/dist/style.css", - ) - with tag("script", src="https://cdn.jsdelivr.net/npm/simple-datatables@latest"): - doc.asis("") - - with tag("body"): - with tag("div", klass="container pt-3"): - - line("a", "[abi abi] $", klass="pe-3", href="https://www.akpain.net") - line("a", "back to photography", href="https://www.akpain.net/photography/") - - line("h1", "Film Development Price Comparison", klass="pt-2") - - line("p", "This is my attempt to work out the best value for money film developing and service that's available in the UK. Labs are compared as like-for-like as possible, but some variation (especially in scan size) is inevitable.") - with tag("p"): - text("If your favourite/local/whatever lab isn't listed here, ") - line("a", "let me know", href="https://www.akpain.net#contact") - text(" and I'll add it! Likewise, if you want to see E6, ECN2, half frame, 120 or anything else here, please do tell me.") - - line( - "p", - "Development costs last updated " - + datetime.datetime.utcfromtimestamp(raw_data_object["time"]).strftime( - "%Y-%m-%d %H:%M:%S" - ) - + ". Price per pixel figures do not include estimates for outbound or return shipping." - ) - - with tag("div", klass="card", style="width: 18rem;"): - with tag("div", klass="card-body"): - line("div", "Contents", klass="card-title", style="font-family: var(--font-monospace)") - with tag("ul", klass="card-text"): - for key in entries_by_type: - chemistry, format, subformat = key - slug = slugify(chemistry + format + subformat) - with tag("li"): - line("a", f"{chemistry} {format} ({subformat})", href=f"#{slug}-title") - - slugs = [] - - for key in entries_by_type: - chemistry, format, subformat = key - - slug = slugify(chemistry + format + subformat) - slugs.append(slug) - - line( - "h2", - f"{chemistry} {format} ({subformat})", - klass="h3 pt-4", - id=slug + "-title", - ) - - if key in notes_by_type: - line("p", notes_by_type[key]) - - cols = [ - ("lab", lambda x: x["lab"]), - ( - "outboundShipping", - lambda x: "×" - if x["includesSendShipping"].lower() == "no" - else x["sendShippingType"], - ), - ( - "returnShipping", - lambda x: ( - "Free" - if (c := float(x["returnShippingCost"])) == 0 - else _format_price(c) - ) - + f" ({x['returnShippingType']})", - ), - ("cost", lambda x: _format_price(float(x["cost"]))), - ( - "renderResolution", - lambda x: f"{x['resolution']} ({repr(x['resolutionName'])})", - ), - ( - "pricePerPixel", - lambda x: "{:.5f}p".format( - float(x["cost"]) - * 100 - / reduce( - lambda y, z: y * z, - map(int, x["resolution"].split("x")), - 1, - ) - ), - ), - ("link", lambda x: _render_line("a", "Link", href=x["url"])), - ] - - # begin working out price per pixel colour scales - pppfn = None - for i, item in enumerate(cols): - if item[0] == "pricePerPixel": - pppfn = item[1] - break - assert pppfn is not None - pppcolours = {pppfn(data): "" for data in entries_by_type[key]} - coldiff = ( - int(120 / (len(pppcolours) - 1)) if len(pppcolours) - 1 != 0 else 0 - ) - for i, (val, rawval) in enumerate( - sorted( - map(lambda x: (float(x[:-1]), x), pppcolours.keys()), - key=lambda y: y[0], - ) - ): - pppcolours[rawval] = f"hsl({120 - (i * coldiff)}, 71%, 73%)" - # end - - with tag("table", klass="table table-hover", id=slug): - with tag("thead"): - with tag("tr"): - for t, _ in cols: - line("th", col_titles[t], scope="col") - - with tag("tbody"): - for data in sorted( - entries_by_type[key], key=lambda x: x["lab"] - ): - with tag("tr"): - for i, (key, fn) in enumerate(cols): - if i == 0: - line("th", fn(data), scope="row") - else: - with tag("td"): - val = fn(data) - doc.asis(val) - - if key == "pricePerPixel": - doc.attr( - style="background-color: " - + pppcolours[val] - ) - - with tag("script"): - doc.asis("const slugs = ") - doc.asis(json.dumps(slugs)) - doc.asis(";\n") - with open(Path(__file__).resolve().parent / "page.js") as f: - doc.asis(f.read()) - - with tag( - "script", - src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js", - ): - doc.asis() - -with open(OUTPUTFILE, "w") as f: - f.write(doc.getvalue()) diff --git a/postprocessor/page.js b/postprocessor/page.js deleted file mode 100644 index 003e563..0000000 --- a/postprocessor/page.js +++ /dev/null @@ -1,9 +0,0 @@ -// populated by ssg: -// const slugs = []; - -for (const slug of slugs) { - new simpleDatatables.DataTable("#" + slug, { - paging: false, - searchable: false, - }) -} diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 4185e6a..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,17 +0,0 @@ -[tool.poetry] -name = "filmdev-scraper" -version = "0.1.0" -description = "" -authors = ["AKP "] -readme = "README.md" - -[tool.poetry.dependencies] -python = "^3.10" -selenium = "^4.23.1" -yattag = "^1.16.0" -tqdm = "^4.66.5" - - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/pyrightconfig.json b/pyrightconfig.json deleted file mode 100644 index 92b1c26..0000000 --- a/pyrightconfig.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "venvPath": "/home/akp/.cache/pypoetry/virtualenvs", - "venv": "filmdev-scraper-ijnHeEG_-py3.10" -} diff --git a/scraper/__main__.py b/scraper/__main__.py deleted file mode 100644 index 34bd70b..0000000 --- a/scraper/__main__.py +++ /dev/null @@ -1,51 +0,0 @@ -import scrapers -from selenium import webdriver -from tqdm import tqdm -import json -import time -import sys - -OUTPUTFILE = sys.argv[0] - -driver = webdriver.Firefox() - -datapoints = [] - -try: - for cls in tqdm( - [ - scrapers.TheFilmSafe, - scrapers.HarmanLab, - scrapers.AGPhotoLab, - scrapers.FilmProcessingCoUk, - scrapers.PPPCamera, - scrapers.AnalogueWonderland, - scrapers.Minilab, - ] - ): - datapoints += cls(driver).scrape() -finally: - driver.quit() - -with open(OUTPUTFILE, "w") as f: - json.dump( - { - "time": int(time.time()), - "data": datapoints, - "notes": [ - { - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "note": "Assuming one 36-shot roll of film", - }, - { - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "note": "Assuming one 36-shot roll of film", - }, - ], - }, - f, - ) diff --git a/scraper/scrapers.py b/scraper/scrapers.py deleted file mode 100644 index 65c6e42..0000000 --- a/scraper/scrapers.py +++ /dev/null @@ -1,534 +0,0 @@ -import re -import time -from selenium import webdriver -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import Select -from selenium.webdriver.support import expected_conditions -from selenium.webdriver.support.wait import WebDriverWait -import selenium.common.exceptions - - -class _BaseScraper: - def __init__(self, driver): - self.driver = driver - - -class PPPCamera(_BaseScraper): - def scrape(self) -> list[dict]: - self.driver.get("https://pppcameras.co.uk/lab/p/35mm-film") - - values_to_select = [ - # aria-label value, option value to select - ("Select Services", "Dev + Mid Res"), - ("Select Full Frame scans", "No"), - ("Select Prints", "No Prints"), - ] - - for (aria_label, option_value) in values_to_select: - elem = self.driver.find_element( - By.CSS_SELECTOR, f"select[aria-label={repr(aria_label)}]" - ) - Select(elem).select_by_value(option_value) - - elem = self.driver.find_element(By.CSS_SELECTOR, "div.product-price") - return [ - { - "lab": "PPP Cameras", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "0", - "returnShippingType": "Unspecified", - "cost": elem.text.replace("£", ""), - "resolution": "3637x2433", - "resolutionName": "Mid", - "url": "https://pppcameras.co.uk/lab/p/35mm-film", - } - ] - - -class AnalogueWonderland(_BaseScraper): - def scrape(self) -> list[dict]: - return [ - { - "lab": "Analogue Wonderland", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "yes", - "sendShippingType": "Royal Mail Tracked 48", - "returnShippingCost": "3", - "returnShippingType": "Unspecified", - "cost": self._scrape_35mm_with_options( - [ - # title of thing to click - "Colour (C-41)", - "Standard Scans", - "Correct and Rotate", - ] - ), - "resolution": "3024x2005", - "resolutionName": "Standard", - "url": "https://analoguewonderland.co.uk/products/35mm-film-development", - }, - { - "lab": "Analogue Wonderland", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "yes", - "sendShippingType": "Royal Mail Tracked 48", - "returnShippingCost": "3", - "returnShippingType": "Unspecified", - "cost": self._scrape_35mm_with_options( - [ - # title of thing to click - "Black and White", - "Standard Scans", - "Correct and Rotate", - ] - ), - "resolution": "3024x2005", - "resolutionName": "Standard", - "url": "https://analoguewonderland.co.uk/products/35mm-film-development", - }, - ] - - def _scrape_35mm_with_options(self, opts: list[str]) -> str: - # opts is a list of titles of buttons to click - - self.driver.get( - "https://analoguewonderland.co.uk/products/35mm-film-development" - ) - - try: - # wait for "free film!!1" popup - elem = WebDriverWait(self.driver, 5).until( - expected_conditions.presence_of_element_located( - (By.CSS_SELECTOR, '[aria-label="Close dialog"]') - ) - ) - elem.click() - time.sleep(1) # wait for animation to play - except selenium.common.exceptions.TimeoutException: - pass - - # make sure it's possible to see the buttons (will raise an exception if we try to click something that's off of the page) by scrolling to the review widget - self.driver.execute_script( - "arguments[0].scrollIntoView(true)", - self.driver.find_element(By.CSS_SELECTOR, ".jdgm-prev-badge__text"), - ) - - for title in opts: - elem = self.driver.find_element( - By.CSS_SELECTOR, f"label.block-swatch__item[title={repr(title)}]" - ) - elem.click() - - return self.driver.find_element( - By.CSS_SELECTOR, "span.price > span.money" - ).text.replace("£", "") - - -class Minilab(_BaseScraper): - def scrape(self) -> list[dict]: - c41 = { - "lab": "The Minilab", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "0.85", - "returnShippingType": "Royal Mail 48", - "resolution": "3024x2005", - "resolutionName": "High JPEG", - "url": "https://www.theminilab.co.uk/product-page/c41-dev-scan", - } - c41["cost"] = self._scrape_35mm_with_url(c41["url"]) - - bw = { - "lab": "The Minilab", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "0.85", - "returnShippingType": "Royal Mail 48", - "resolution": "3024x2005", - "resolutionName": "High JPEG", - "url": "https://www.theminilab.co.uk/product-page/b-w-35mm-dev-scan", - } - bw["cost"] = self._scrape_35mm_with_url(bw["url"]) - - return [c41, bw] - - def _scrape_35mm_with_url(self, url) -> str: - self.driver.get(url) - - res_elem_found = WebDriverWait(self.driver, 10).until( - expected_conditions.text_to_be_present_in_element( - (By.CSS_SELECTOR, "label[for]"), "Resolution" - ), - ) # Waiting for the dynamically created form to be dynamiced out of thin air - assert res_elem_found - - dropdown_items = self.driver.find_elements( - By.CSS_SELECTOR, 'div[data-hook="dropdown-base-text"]' - ) - select_item = None - for item in dropdown_items: - if item.text == "Select": - select_item = item - break - - assert select_item is not None - self.driver.execute_script("arguments[0].scrollIntoView(true)", select_item) - select_item.click() - - dropdown_items = self.driver.find_elements( - By.CSS_SELECTOR, "span[aria-hidden=false]" - ) - high_res_item = None - for item in dropdown_items: - if item.text == "High Res JPEG": - high_res_item = item - break - - assert high_res_item is not None - high_res_item.click() - - return self.driver.find_element( - By.CSS_SELECTOR, "span[data-wix-price]" - ).text.replace("£", "") - - -class FilmProcessingCoUk(_BaseScraper): - def scrape(self) -> list[dict]: - c41 = { - "lab": "FilmProcessing.co.uk", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "0", - "returnShippingType": "Royal Mail 48", - "resolution": "2728x1830", - "resolutionName": "Standard", - "url": "https://www.filmprocessing.co.uk/onlinestore/35mm-Colour-Film-Processing-p68571250", - } - c41["cost"] = self._scrape_35mm_with_url_and_opts( - c41["url"], - [ - ("Exposure", "Up to 39 Exposure"), - ("Print Size", "No Prints Required"), - ("Extra Sets (Per Film)", "No Extra Set Required"), - ("Film to CD / Dropbox", "Medium Quality Dropbox"), - ], - ) - - bw = { - "lab": "FilmProcessing.co.uk", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "0", - "returnShippingType": "Royal Mail 48", - "resolution": "2728x1830", - "resolutionName": "Standard", - "url": "https://www.filmprocessing.co.uk/onlinestore/35mm-Black-&-White-Film-Processing-p345592049", - } - bw["cost"] = self._scrape_35mm_with_url_and_opts( - bw["url"], - [ - ("Exposures", "Up to 39 Exposures"), - ("Print Size", "No Prints Required"), - ("Extra Sets (per Film)", "No Extra Sets Required"), - ("Film to CD / Dropbox", "Medium Quality Dropbox"), - ], - ) - - return [c41, bw] - - def _scrape_35mm_with_url_and_opts( - self, url: str, opts: list[tuple[str, str]] - ) -> str: - # opts tuples are aria-label value, option value to select - self.driver.get(url) - - WebDriverWait(self.driver, 10).until( - expected_conditions.presence_of_element_located( - (By.CSS_SELECTOR, 'iframe[aria-label="Online Store"][src]') - ) - ) # wait for one iframe to get its source - - all_iframes = self.driver.find_elements( - By.CSS_SELECTOR, 'iframe[aria-label="Online Store"][src]' - ) # get all iframes - # search for the frame that contains the store options - target_iframe = None - for frame in all_iframes: - if url.split("/")[-1] not in frame.get_attribute("src"): - continue - target_iframe = frame - continue - assert target_iframe is not None - - self.driver.get(target_iframe.get_attribute("src")) - - elem = WebDriverWait(self.driver, 10).until( - expected_conditions.text_to_be_present_in_element( - (By.CSS_SELECTOR, "div.product-details-module__title"), "Exposure" - ) - ) # wait for form to be dynamically loaded in - - for (aria_label, option_value) in opts: - elem = self.driver.find_element( - By.CSS_SELECTOR, f"select[aria-label={repr(aria_label)}]" - ) - Select(elem).select_by_value(option_value) - - return self.driver.find_element( - By.CSS_SELECTOR, "span.details-product-price__value" - ).text.replace("£", "") - - -class AGPhotoLab(_BaseScraper): - def scrape(self) -> list[dict]: - c41 = { - "lab": "AG Photo Lab", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "yes", - "sendShippingType": "Freepost", - "returnShippingCost": "4.94", - "returnShippingType": "Royal Mail 24", - "resolution": "3089x2048", - "resolutionName": "Standard JPEG", - "url": "https://www.ag-photolab.co.uk/product/c41/", - } - c41["cost"] = self._scrape_35mm_with_url_and_options( - c41["url"], - [ - ("5c8fbe78a2c805.23255089", "35mm_0"), # film format - ("666aa5b7aab344.41469556", "Standard sleeving_0"), # film sleeving - ("5c8fcb67a26bd1.60477546", "Standard Scan 8bit JPEG_0"), # scans - ( - "5c8fcbc6a26c40.29952473", - "Upload files via the web_0", - ), # scan delivery - ], - ) - - bw = { - "lab": "AG Photo Lab", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "yes", - "sendShippingType": "Freepost", - "returnShippingCost": "4.94", - "returnShippingType": "Royal Mail 24", - "resolution": "3089x2048", - "resolutionName": "Standard JPEG", - "url": "https://www.ag-photolab.co.uk/product/black-white/", - } - bw["cost"] = self._scrape_35mm_with_url_and_options( - bw["url"], - [ - ("5c90be26ccc352.83454456", "35mm_0"), # film format - ("5c90c037ccc3d4.45704796", "Standard Sleeving_0"), # film sleeving - ("5c90be26ccc341.38603868", "Standard Scan 8bit JPEG_0"), # scans - ( - "5c90c097ccc3e6.45684541", - "Upload files via the web_0", - ), # scan delivery - ], - ) - - return [c41, bw] - - def _scrape_35mm_with_url_and_options( - self, url: str, opts: list[tuple[str, str]] - ) -> str: - # opts are data-uniqid value, option value to select - self.driver.get(url) - - try: - elem = ( - WebDriverWait(self.driver, 3) - .until( - expected_conditions.presence_of_element_located( - (By.CSS_SELECTOR, "button.cky-btn-reject") - ) - ) - .click() - ) # this cookie popup is big enough that i can see it causing issues so we'll actually get rid of it here - except selenium.common.exceptions.TimeoutException: - pass - - elem = WebDriverWait(self.driver, 10).until( - expected_conditions.presence_of_element_located( - (By.CSS_SELECTOR, f'div[data-uniqid="{opts[0][0]}"]') - ) - ) # wait for the selection boxes to appear - self.driver.execute_script( - "arguments[0].scrollIntoView(true)", - self.driver.find_element(By.CSS_SELECTOR, "h1.product_title"), - ) # make sure it's possible to see the selection boxes - - for (aria_label, option_value) in opts: - elem = self.driver.find_element( - By.CSS_SELECTOR, f"[data-uniqid={repr(aria_label)}]" - ) - elem = elem.find_element(By.TAG_NAME, "select") - Select(elem).select_by_value(option_value) - - return ( - self.driver.find_element(By.CSS_SELECTOR, "span.price.amount.final") - .text.replace(" ", "") - .replace("£", "") - ) - - -class HarmanLab(_BaseScraper): - def scrape(self) -> list[dict]: - c41 = { - "lab": "Harman Lab", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "2.95", - "returnShippingType": "Royal Mail 24", - "resolution": "1500x2250", - "resolutionName": "Std", - "url": "https://harmanlab.com/products/developing-only-135-colour-c41-film?variant=42500108189938", - } - c41["cost"] = self._scrape_with_url(c41["url"]) - - bw = { - "lab": "Harman Lab", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "2.95", - "returnShippingType": "Royal Mail 24", - "resolution": "1500x2250", - "resolutionName": "Std", - "url": "https://harmanlab.com/products/black-and-white-film-developing-only?variant=42499934716146", - } - bw["cost"] = self._scrape_with_url(bw["url"]) - - return [c41, bw] - - def _scrape_with_url(self, url: str) -> str: - self.driver.get(url) - return ( - self.driver.find_element( - By.CSS_SELECTOR, "span.price-item.price-item--regular" - ) - .text.replace(" GBP", "") - .replace("£", "") - ) - - -class TheFilmSafe(_BaseScraper): - # Note for the future: they have a bulk discount of £1 per roll - def scrape(self) -> list[dict]: - return [ - { - "lab": "The Film Safe", - "chemistry": "C41", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "2", - "returnShippingType": "Royal Mail 48", - "cost": self._get_price_for_options( - [ - ("product-dropdown-1_2", "C41 (colour)"), # film process - ("product-dropdown-3_4_18", "35mm"), # film format - ( - "product-dropdown-13_14_16_17", - "Med Res JPEG (£9)", - ), # image resolution - ( - "product-dropdown-9_10_11", - "CALM", - ), # charity choice (required to get a price) - ] - ), - "resolution": "3100x2100", - "resolutionName": "Med", - "url": "https://www.thefilmsafe.co.uk/product-page/developing-scanning", - }, - { - "lab": "The Film Safe", - "chemistry": "B&W", - "format": "35mm", - "subformat": "full frame", - "includesSendShipping": "no", - "sendShippingType": "", - "returnShippingCost": "2", - "returnShippingType": "Royal Mail 48", - "cost": self._get_price_for_options( - [ - ("product-dropdown-1_2", "BW"), # film process - ("product-dropdown-3_4_18", "35mm"), # film format - ( - "product-dropdown-13_14_16_17", - "Med Res JPEG (£9)", - ), # image resolution - ( - "product-dropdown-9_10_11", - "CALM", - ), # charity choice (required to get a price) - ] - ), - "resolution": "3100x2100", - "resolutionName": "Med", - "url": "https://www.thefilmsafe.co.uk/product-page/developing-scanning", - }, - ] - - def _get_price_for_options(self, opts: list[tuple[str, str]]) -> str: - # tuples are (id of corresponding label, title of div to click) - self.driver.get( - "https://www.thefilmsafe.co.uk/product-page/developing-scanning" - ) - - WebDriverWait(self.driver, 10).until( - expected_conditions.presence_of_element_located( - (By.CSS_SELECTOR, "label#product-dropdown-1_2[for]") - ) - ) # wait for the dropdown options to get stitched together to the form boxes - - for (label_id, option_title) in opts: - elem = self.driver.find_element( - By.CSS_SELECTOR, f"button[aria-labelledby={repr(label_id)}]" - ) - elem.click() - elem = self.driver.find_element( - By.CSS_SELECTOR, - f'div[data-hook="popover-content"] div[title={repr(option_title)}]', - ) - elem.click() - - return self.driver.find_element( - By.CSS_SELECTOR, "span[data-wix-price]" - ).text.replace("£", "") - - -# TODO: https://www.exposurefilmlab.com/