534 lines
19 KiB
Python
534 lines
19 KiB
Python
import re
|
|
import time
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import Select
|
|
from selenium.webdriver.support import expected_conditions
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
import selenium.common.exceptions
|
|
|
|
|
|
class _BaseScraper:
|
|
def __init__(self, driver):
|
|
self.driver = driver
|
|
|
|
|
|
class PPPCamera(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
self.driver.get("https://pppcameras.co.uk/lab/p/35mm-film")
|
|
|
|
values_to_select = [
|
|
# aria-label value, option value to select
|
|
("Select Services", "Dev + Mid Res"),
|
|
("Select Full Frame scans", "No"),
|
|
("Select Prints", "No Prints"),
|
|
]
|
|
|
|
for (aria_label, option_value) in values_to_select:
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR, f"select[aria-label={repr(aria_label)}]"
|
|
)
|
|
Select(elem).select_by_value(option_value)
|
|
|
|
elem = self.driver.find_element(By.CSS_SELECTOR, "div.product-price")
|
|
return [
|
|
{
|
|
"lab": "PPP Cameras",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "0",
|
|
"returnShippingType": "Unspecified",
|
|
"cost": elem.text.replace("£", ""),
|
|
"resolution": "3637x2433",
|
|
"resolutionName": "Mid",
|
|
"url": "https://pppcameras.co.uk/lab/p/35mm-film",
|
|
}
|
|
]
|
|
|
|
|
|
class AnalogueWonderland(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
return [
|
|
{
|
|
"lab": "Analogue Wonderland",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "yes",
|
|
"sendShippingType": "Royal Mail Tracked 48",
|
|
"returnShippingCost": "3",
|
|
"returnShippingType": "Unspecified",
|
|
"cost": self._scrape_35mm_with_options(
|
|
[
|
|
# title of thing to click
|
|
"Colour (C-41)",
|
|
"Standard Scans",
|
|
"Correct and Rotate",
|
|
]
|
|
),
|
|
"resolution": "3024x2005",
|
|
"resolutionName": "Standard",
|
|
"url": "https://analoguewonderland.co.uk/products/35mm-film-development",
|
|
},
|
|
{
|
|
"lab": "Analogue Wonderland",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "yes",
|
|
"sendShippingType": "Royal Mail Tracked 48",
|
|
"returnShippingCost": "3",
|
|
"returnShippingType": "Unspecified",
|
|
"cost": self._scrape_35mm_with_options(
|
|
[
|
|
# title of thing to click
|
|
"Black and White",
|
|
"Standard Scans",
|
|
"Correct and Rotate",
|
|
]
|
|
),
|
|
"resolution": "3024x2005",
|
|
"resolutionName": "Standard",
|
|
"url": "https://analoguewonderland.co.uk/products/35mm-film-development",
|
|
},
|
|
]
|
|
|
|
def _scrape_35mm_with_options(self, opts: list[str]) -> str:
|
|
# opts is a list of titles of buttons to click
|
|
|
|
self.driver.get(
|
|
"https://analoguewonderland.co.uk/products/35mm-film-development"
|
|
)
|
|
|
|
try:
|
|
# wait for "free film!!1" popup
|
|
elem = WebDriverWait(self.driver, 5).until(
|
|
expected_conditions.presence_of_element_located(
|
|
(By.CSS_SELECTOR, '[aria-label="Close dialog"]')
|
|
)
|
|
)
|
|
elem.click()
|
|
time.sleep(1) # wait for animation to play
|
|
except selenium.common.exceptions.TimeoutException:
|
|
pass
|
|
|
|
# make sure it's possible to see the buttons (will raise an exception if we try to click something that's off of the page) by scrolling to the review widget
|
|
self.driver.execute_script(
|
|
"arguments[0].scrollIntoView(true)",
|
|
self.driver.find_element(By.CSS_SELECTOR, ".jdgm-prev-badge__text"),
|
|
)
|
|
|
|
for title in opts:
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR, f"label.block-swatch__item[title={repr(title)}]"
|
|
)
|
|
elem.click()
|
|
|
|
return self.driver.find_element(
|
|
By.CSS_SELECTOR, "span.price > span.money"
|
|
).text.replace("£", "")
|
|
|
|
|
|
class Minilab(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
c41 = {
|
|
"lab": "The Minilab",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "0.85",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"resolution": "3024x2005",
|
|
"resolutionName": "High JPEG",
|
|
"url": "https://www.theminilab.co.uk/product-page/c41-dev-scan",
|
|
}
|
|
c41["cost"] = self._scrape_35mm_with_url(c41["url"])
|
|
|
|
bw = {
|
|
"lab": "The Minilab",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "0.85",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"resolution": "3024x2005",
|
|
"resolutionName": "High JPEG",
|
|
"url": "https://www.theminilab.co.uk/product-page/b-w-35mm-dev-scan",
|
|
}
|
|
bw["cost"] = self._scrape_35mm_with_url(bw["url"])
|
|
|
|
return [c41, bw]
|
|
|
|
def _scrape_35mm_with_url(self, url) -> str:
|
|
self.driver.get(url)
|
|
|
|
res_elem_found = WebDriverWait(self.driver, 10).until(
|
|
expected_conditions.text_to_be_present_in_element(
|
|
(By.CSS_SELECTOR, "label[for]"), "Resolution"
|
|
),
|
|
) # Waiting for the dynamically created form to be dynamiced out of thin air
|
|
assert res_elem_found
|
|
|
|
dropdown_items = self.driver.find_elements(
|
|
By.CSS_SELECTOR, 'div[data-hook="dropdown-base-text"]'
|
|
)
|
|
select_item = None
|
|
for item in dropdown_items:
|
|
if item.text == "Select":
|
|
select_item = item
|
|
break
|
|
|
|
assert select_item is not None
|
|
self.driver.execute_script("arguments[0].scrollIntoView(true)", select_item)
|
|
select_item.click()
|
|
|
|
dropdown_items = self.driver.find_elements(
|
|
By.CSS_SELECTOR, "span[aria-hidden=false]"
|
|
)
|
|
high_res_item = None
|
|
for item in dropdown_items:
|
|
if item.text == "High Res JPEG":
|
|
high_res_item = item
|
|
break
|
|
|
|
assert high_res_item is not None
|
|
high_res_item.click()
|
|
|
|
return self.driver.find_element(
|
|
By.CSS_SELECTOR, "span[data-wix-price]"
|
|
).text.replace("£", "")
|
|
|
|
|
|
class FilmProcessingCoUk(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
c41 = {
|
|
"lab": "FilmProcessing.co.uk",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "0",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"resolution": "2728x1830",
|
|
"resolutionName": "Standard",
|
|
"url": "https://www.filmprocessing.co.uk/onlinestore/35mm-Colour-Film-Processing-p68571250",
|
|
}
|
|
c41["cost"] = self._scrape_35mm_with_url_and_opts(
|
|
c41["url"],
|
|
[
|
|
("Exposure", "Up to 39 Exposure"),
|
|
("Print Size", "No Prints Required"),
|
|
("Extra Sets (Per Film)", "No Extra Set Required"),
|
|
("Film to CD / Dropbox", "Medium Quality Dropbox"),
|
|
],
|
|
)
|
|
|
|
bw = {
|
|
"lab": "FilmProcessing.co.uk",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "0",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"resolution": "2728x1830",
|
|
"resolutionName": "Standard",
|
|
"url": "https://www.filmprocessing.co.uk/onlinestore/35mm-Black-&-White-Film-Processing-p345592049",
|
|
}
|
|
bw["cost"] = self._scrape_35mm_with_url_and_opts(
|
|
bw["url"],
|
|
[
|
|
("Exposures", "Up to 39 Exposures"),
|
|
("Print Size", "No Prints Required"),
|
|
("Extra Sets (per Film)", "No Extra Sets Required"),
|
|
("Film to CD / Dropbox", "Medium Quality Dropbox"),
|
|
],
|
|
)
|
|
|
|
return [c41, bw]
|
|
|
|
def _scrape_35mm_with_url_and_opts(
|
|
self, url: str, opts: list[tuple[str, str]]
|
|
) -> str:
|
|
# opts tuples are aria-label value, option value to select
|
|
self.driver.get(url)
|
|
|
|
WebDriverWait(self.driver, 10).until(
|
|
expected_conditions.presence_of_element_located(
|
|
(By.CSS_SELECTOR, 'iframe[aria-label="Online Store"][src]')
|
|
)
|
|
) # wait for one iframe to get its source
|
|
|
|
all_iframes = self.driver.find_elements(
|
|
By.CSS_SELECTOR, 'iframe[aria-label="Online Store"][src]'
|
|
) # get all iframes
|
|
# search for the frame that contains the store options
|
|
target_iframe = None
|
|
for frame in all_iframes:
|
|
if url.split("/")[-1] not in frame.get_attribute("src"):
|
|
continue
|
|
target_iframe = frame
|
|
continue
|
|
assert target_iframe is not None
|
|
|
|
self.driver.get(target_iframe.get_attribute("src"))
|
|
|
|
elem = WebDriverWait(self.driver, 10).until(
|
|
expected_conditions.text_to_be_present_in_element(
|
|
(By.CSS_SELECTOR, "div.product-details-module__title"), "Exposure"
|
|
)
|
|
) # wait for form to be dynamically loaded in
|
|
|
|
for (aria_label, option_value) in opts:
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR, f"select[aria-label={repr(aria_label)}]"
|
|
)
|
|
Select(elem).select_by_value(option_value)
|
|
|
|
return self.driver.find_element(
|
|
By.CSS_SELECTOR, "span.details-product-price__value"
|
|
).text.replace("£", "")
|
|
|
|
|
|
class AGPhotoLab(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
c41 = {
|
|
"lab": "AG Photo Lab",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "yes",
|
|
"sendShippingType": "Freepost",
|
|
"returnShippingCost": "4.94",
|
|
"returnShippingType": "Royal Mail 24",
|
|
"resolution": "3089x2048",
|
|
"resolutionName": "Standard JPEG",
|
|
"url": "https://www.ag-photolab.co.uk/product/c41/",
|
|
}
|
|
c41["cost"] = self._scrape_35mm_with_url_and_options(
|
|
c41["url"],
|
|
[
|
|
("5c8fbe78a2c805.23255089", "35mm_0"), # film format
|
|
("666aa5b7aab344.41469556", "Standard sleeving_0"), # film sleeving
|
|
("5c8fcb67a26bd1.60477546", "Standard Scan 8bit JPEG_0"), # scans
|
|
(
|
|
"5c8fcbc6a26c40.29952473",
|
|
"Upload files via the web_0",
|
|
), # scan delivery
|
|
],
|
|
)
|
|
|
|
bw = {
|
|
"lab": "AG Photo Lab",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "yes",
|
|
"sendShippingType": "Freepost",
|
|
"returnShippingCost": "4.94",
|
|
"returnShippingType": "Royal Mail 24",
|
|
"resolution": "3089x2048",
|
|
"resolutionName": "Standard JPEG",
|
|
"url": "https://www.ag-photolab.co.uk/product/black-white/",
|
|
}
|
|
bw["cost"] = self._scrape_35mm_with_url_and_options(
|
|
bw["url"],
|
|
[
|
|
("5c90be26ccc352.83454456", "35mm_0"), # film format
|
|
("5c90c037ccc3d4.45704796", "Standard Sleeving_0"), # film sleeving
|
|
("5c90be26ccc341.38603868", "Standard Scan 8bit JPEG_0"), # scans
|
|
(
|
|
"5c90c097ccc3e6.45684541",
|
|
"Upload files via the web_0",
|
|
), # scan delivery
|
|
],
|
|
)
|
|
|
|
return [c41, bw]
|
|
|
|
def _scrape_35mm_with_url_and_options(
|
|
self, url: str, opts: list[tuple[str, str]]
|
|
) -> str:
|
|
# opts are data-uniqid value, option value to select
|
|
self.driver.get(url)
|
|
|
|
try:
|
|
elem = (
|
|
WebDriverWait(self.driver, 3)
|
|
.until(
|
|
expected_conditions.presence_of_element_located(
|
|
(By.CSS_SELECTOR, "button.cky-btn-reject")
|
|
)
|
|
)
|
|
.click()
|
|
) # this cookie popup is big enough that i can see it causing issues so we'll actually get rid of it here
|
|
except selenium.common.exceptions.TimeoutException:
|
|
pass
|
|
|
|
elem = WebDriverWait(self.driver, 10).until(
|
|
expected_conditions.presence_of_element_located(
|
|
(By.CSS_SELECTOR, f'div[data-uniqid="{opts[0][0]}"]')
|
|
)
|
|
) # wait for the selection boxes to appear
|
|
self.driver.execute_script(
|
|
"arguments[0].scrollIntoView(true)",
|
|
self.driver.find_element(By.CSS_SELECTOR, "h1.product_title"),
|
|
) # make sure it's possible to see the selection boxes
|
|
|
|
for (aria_label, option_value) in opts:
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR, f"[data-uniqid={repr(aria_label)}]"
|
|
)
|
|
elem = elem.find_element(By.TAG_NAME, "select")
|
|
Select(elem).select_by_value(option_value)
|
|
|
|
return (
|
|
self.driver.find_element(By.CSS_SELECTOR, "span.price.amount.final")
|
|
.text.replace(" ", "")
|
|
.replace("£", "")
|
|
)
|
|
|
|
|
|
class HarmanLab(_BaseScraper):
|
|
def scrape(self) -> list[dict]:
|
|
c41 = {
|
|
"lab": "Harman Lab",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "2.95",
|
|
"returnShippingType": "Royal Mail 24",
|
|
"resolution": "1500x2250",
|
|
"resolutionName": "Std",
|
|
"url": "https://harmanlab.com/products/developing-only-135-colour-c41-film?variant=42500108189938",
|
|
}
|
|
c41["cost"] = self._scrape_with_url(c41["url"])
|
|
|
|
bw = {
|
|
"lab": "Harman Lab",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "2.95",
|
|
"returnShippingType": "Royal Mail 24",
|
|
"resolution": "1500x2250",
|
|
"resolutionName": "Std",
|
|
"url": "https://harmanlab.com/products/black-and-white-film-developing-only?variant=42499934716146",
|
|
}
|
|
bw["cost"] = self._scrape_with_url(bw["url"])
|
|
|
|
return [c41, bw]
|
|
|
|
def _scrape_with_url(self, url: str) -> str:
|
|
self.driver.get(url)
|
|
return (
|
|
self.driver.find_element(
|
|
By.CSS_SELECTOR, "span.price-item.price-item--regular"
|
|
)
|
|
.text.replace(" GBP", "")
|
|
.replace("£", "")
|
|
)
|
|
|
|
|
|
class TheFilmSafe(_BaseScraper):
|
|
# Note for the future: they have a bulk discount of £1 per roll
|
|
def scrape(self) -> list[dict]:
|
|
return [
|
|
{
|
|
"lab": "The Film Safe",
|
|
"chemistry": "C41",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "2",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"cost": self._get_price_for_options(
|
|
[
|
|
("product-dropdown-1_2", "C41 (colour)"), # film process
|
|
("product-dropdown-3_4_18", "35mm"), # film format
|
|
(
|
|
"product-dropdown-13_14_16_17",
|
|
"Med Res JPEG (£9)",
|
|
), # image resolution
|
|
(
|
|
"product-dropdown-9_10_11",
|
|
"CALM",
|
|
), # charity choice (required to get a price)
|
|
]
|
|
),
|
|
"resolution": "3100x2100",
|
|
"resolutionName": "Med",
|
|
"url": "https://www.thefilmsafe.co.uk/product-page/developing-scanning",
|
|
},
|
|
{
|
|
"lab": "The Film Safe",
|
|
"chemistry": "B&W",
|
|
"format": "35mm",
|
|
"subformat": "full frame",
|
|
"includesSendShipping": "no",
|
|
"sendShippingType": "",
|
|
"returnShippingCost": "2",
|
|
"returnShippingType": "Royal Mail 48",
|
|
"cost": self._get_price_for_options(
|
|
[
|
|
("product-dropdown-1_2", "BW"), # film process
|
|
("product-dropdown-3_4_18", "35mm"), # film format
|
|
(
|
|
"product-dropdown-13_14_16_17",
|
|
"Med Res JPEG (£9)",
|
|
), # image resolution
|
|
(
|
|
"product-dropdown-9_10_11",
|
|
"CALM",
|
|
), # charity choice (required to get a price)
|
|
]
|
|
),
|
|
"resolution": "3100x2100",
|
|
"resolutionName": "Med",
|
|
"url": "https://www.thefilmsafe.co.uk/product-page/developing-scanning",
|
|
},
|
|
]
|
|
|
|
def _get_price_for_options(self, opts: list[tuple[str, str]]) -> str:
|
|
# tuples are (id of corresponding label, title of div to click)
|
|
self.driver.get(
|
|
"https://www.thefilmsafe.co.uk/product-page/developing-scanning"
|
|
)
|
|
|
|
WebDriverWait(self.driver, 10).until(
|
|
expected_conditions.presence_of_element_located(
|
|
(By.CSS_SELECTOR, "label#product-dropdown-1_2[for]")
|
|
)
|
|
) # wait for the dropdown options to get stitched together to the form boxes
|
|
|
|
for (label_id, option_title) in opts:
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR, f"button[aria-labelledby={repr(label_id)}]"
|
|
)
|
|
elem.click()
|
|
elem = self.driver.find_element(
|
|
By.CSS_SELECTOR,
|
|
f'div[data-hook="popover-content"] div[title={repr(option_title)}]',
|
|
)
|
|
elem.click()
|
|
|
|
return self.driver.find_element(
|
|
By.CSS_SELECTOR, "span[data-wix-price]"
|
|
).text.replace("£", "")
|
|
|
|
|
|
# TODO: https://www.exposurefilmlab.com/
|