diff --git a/.gitignore b/.gitignore index 82d44c8..6b25bbf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +bundle.zip +index.html +data2.json ### Python ### # Byte-compiled / optimized / DLL files diff --git a/postprocessor/__main__.py b/postprocessor/__main__.py index afdc65c..d782f26 100644 --- a/postprocessor/__main__.py +++ b/postprocessor/__main__.py @@ -103,20 +103,23 @@ with tag("html"): line("h1", "Film Development Price Comparison", klass="pt-2") - line("p", "This is my attempt to work out the best value for money film developing and service that's available in the UK. Labs are compared as like-for-like as possible, but some variation (especially in scan size) is inevitable.") + line("p", "This is my attempt to work out the best value for money film developing and scanning service that's available in the UK. Labs are compared as like-for-like as possible, but some variation (especially in scan size) is inevitable.") with tag("p"): text("If your favourite/local/whatever lab isn't listed here, ") line("a", "let me know", href="https://www.akpain.net#contact") text(" and I'll add it! Likewise, if you want to see E6, ECN2, half frame, 120 or anything else here, please do tell me.") - line( - "p", + with tag("p"): + text( "Development costs last updated " + datetime.datetime.utcfromtimestamp(raw_data_object["time"]).strftime( "%Y-%m-%d %H:%M:%S" ) - + ". Price per pixel figures do not include estimates for outbound or return shipping." - ) + + ". Price per pixel figures do not include estimates for outbound or return shipping. " + ) + line("a", "Raw data available here", href="rawdata.json") + text(".") + with tag("div", klass="card", style="width: 18rem;"): with tag("div", klass="card-body"): diff --git a/scraper/__main__.py b/scraper/__main__.py index 34bd70b..c616562 100644 --- a/scraper/__main__.py +++ b/scraper/__main__.py @@ -1,29 +1,36 @@ import scrapers from selenium import webdriver +from selenium.common.exceptions import WebDriverException from tqdm import tqdm import json import time import sys -OUTPUTFILE = sys.argv[0] +OUTPUTFILE = sys.argv[1] -driver = webdriver.Firefox() +driver_options = webdriver.ChromeOptions() +driver_options.add_argument("--headless=new") +driver_options.add_argument("--window-size=1920,1080") +driver = webdriver.Chrome(options=driver_options) datapoints = [] try: - for cls in tqdm( + for i, cls in enumerate(tqdm( [ + scrapers.Minilab, scrapers.TheFilmSafe, scrapers.HarmanLab, scrapers.AGPhotoLab, scrapers.FilmProcessingCoUk, scrapers.PPPCamera, scrapers.AnalogueWonderland, - scrapers.Minilab, ] - ): + )): datapoints += cls(driver).scrape() +except WebDriverException as e: + driver.save_screenshot(f"crash.{int(time.time())}.png") + raise e finally: driver.quit()