This repository has been archived on 2025-07-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
circuitbodge/circuit-laundry-notifier/circuit_scraper.py
AKP b3946f752c
Alter 5 files
Update `circuit_scraper.py`
Update `selectMachine.js`
Update `selectSite.js`
Update `index.html`
Update `web.py`
2022-11-08 23:42:26 +00:00

144 lines
4.5 KiB
Python

from dataclasses import dataclass
from enum import Enum
from typing import *
import requests
from bs4 import BeautifulSoup
from cachetools import cached, TTLCache
class ScraperError(ValueError):
pass
class MachineState(Enum):
Available = "AVAIL"
InUse = "IN_USE"
Completed = "COMPLETED"
Unknown = "UNKNOWN"
class MachineType(Enum):
Washer = "WASHER"
Dryer = "DRYER"
@dataclass(init=False, repr=True)
class Machine:
number: str
type: MachineType
state: MachineState
minutes_remaining: Optional[int]
def to_dict(self) -> Dict[str, Union[str, Optional[int]]]:
return {
"number": self.number,
"type": self.type.value,
"state": self.state.value,
"minutes_remaining": self.minutes_remaining,
}
@dataclass
class Site:
id: str
name: Optional[str]
machines: List[Machine]
def get_machine(self, machine_number: str) -> Optional[Machine]:
res: Optional[Machine] = None
for machine in self.machines:
if machine.number == machine_number:
res = machine
break
return res
class CircuitScraper:
_base_url: str = "https://www.circuit.co.uk/circuit-view/laundry-site"
_class_washer = "accordion__title"
_class_dryer = "accordion__title--dryer"
_class_in_use = "accordion__title--in-use"
_class_completed = "accordion__title--idle"
_class_state_unknown = "accordion__title--unknown"
@staticmethod
def _get_site_url(site_id: str) -> str:
return CircuitScraper._base_url + f"/?site={site_id}"
@staticmethod
@cached(cache=TTLCache(maxsize=64, ttl=30))
def get_site_machine_states(site_id: str) -> Site:
site_url = CircuitScraper._get_site_url(site_id)
r = requests.get(site_url)
r.raise_for_status()
# Instead of a nice 404, a bad site ID redirects us to a /circuit-view/site-unavailable with a HTTP 200.
if "unavailable" in r.url:
raise ScraperError("Unavailable")
soup = BeautifulSoup(r.content, "html.parser")
site_name: Optional[str] = None
titles = soup.select(".circuit-view-container h3")
if len(titles) != 0:
site_name = titles[0].get_text(strip=True)
machine_elements = []
for item in soup.select("section.accordions--circuit-view.js-machine-type"):
machine_elements.extend(item.select("div.accordion"))
machines = []
for item in machine_elements:
states = item.select("div.accordion__slug div.accordion__title")
if len(states) == 0:
continue
attr_classes = states[0].attrs.get("class", [])
machine = Machine()
descriptor_text = states[0].get_text().lower()
machine.type = (
MachineType.Dryer if "dryer" in descriptor_text else MachineType.Washer
)
machine.number = (
descriptor_text.replace("washer", "")
.replace("dryer", "")
.strip()
.upper()
)
# Note that CircuitScraper._class_washer is included on every item, hence if it's none of the other ones are
# present, we fall back to that one.
if CircuitScraper._class_in_use in attr_classes:
machine.state = MachineState.InUse
elif CircuitScraper._class_completed in attr_classes:
machine.state = MachineState.Completed
elif CircuitScraper._class_state_unknown in attr_classes:
machine.state = MachineState.Unknown
elif CircuitScraper._class_dryer in attr_classes:
# Technically we could just pretend this one doesn't exist, but we'll keep it in for posterity's sake.
machine.state = MachineState.Available
else:
machine.state = MachineState.Available
if machine.state == MachineState.InUse:
spans = item.select("p span")
if len(spans) == 0:
machine.minutes_remaining = None
else:
minutes_remaining_text = spans[0].get_text(strip=True)
machine.minutes_remaining = int(
minutes_remaining_text.replace("mins", "").strip()
)
else:
machine.minutes_remaining = None
machines.append(machine)
return Site(site_id, site_name, machines)