This repository has been archived on 2025-07-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
circuitbodge/circuit-laundry-notifier/circuit_scraper.py
AKP 87987553e4
Add basic API for machine states
Signed-off-by: AKP <tom@tdpain.net>
2022-11-08 01:29:49 +00:00

113 lines
3.7 KiB
Python

from dataclasses import dataclass
from enum import Enum
from typing import *
import requests
from bs4 import BeautifulSoup
class ScraperError(ValueError):
pass
class MachineState(Enum):
Available = "AVAIL"
InUse = "IN_USE"
Completed = "COMPLETED"
Unknown = "UNKNOWN"
class MachineType(Enum):
Washer = "WASHER"
Dryer = "DRYER"
@dataclass(init=False, repr=True)
class Machine:
number: str
type: MachineType
state: MachineState
minutes_remaining: Optional[int]
def to_dict(self) -> Dict[str, Union[str, Optional[int]]]:
return {
"number": self.number,
"type": self.type.value,
"state": self.state.value,
"minutes_remaining": self.minutes_remaining,
}
class CircuitScraper:
_base_url: str = "https://www.circuit.co.uk/circuit-view/laundry-site"
_class_washer = "accordion__title"
_class_dryer = "accordion__title--dryer"
_class_in_use = "accordion__title--in-use"
_class_completed = "accordion__title--idle"
_class_state_unknown = "accordion__title--unknown"
@staticmethod
def _get_site_url(site_id: str) -> str:
return CircuitScraper._base_url + f"/?site={site_id}"
@staticmethod
def get_site_machine_states(site_id: str) -> List[Machine]:
site_url = CircuitScraper._get_site_url(site_id)
r = requests.get(site_url)
r.raise_for_status()
# Instead of a nice 404, a bad site ID redirects us to a /circuit-view/site-unavailable with a HTTP 200.
if "unavailable" in r.url:
raise ScraperError("Unavailable")
soup = BeautifulSoup(r.content, "html.parser")
machine_elements = []
for item in soup.select("section.accordions--circuit-view.js-machine-type"):
machine_elements.extend(item.select("div.accordion"))
machines = []
for item in machine_elements:
states = item.select("div.accordion__slug div.accordion__title")
if len(states) == 0:
continue
attr_classes = states[0].attrs.get("class", [])
machine = Machine()
descriptor_text = states[0].get_text().lower()
machine.type = MachineType.Dryer if "dryer" in descriptor_text else MachineType.Washer
machine.number = descriptor_text.replace("washer", "").replace("dryer", "").strip().upper()
# Note that CircuitScraper._class_washer is included on every item, hence if it's none of the other ones are
# present, we fall back to that one.
if CircuitScraper._class_in_use in attr_classes:
machine.state = MachineState.InUse
elif CircuitScraper._class_completed in attr_classes:
machine.state = MachineState.Completed
elif CircuitScraper._class_state_unknown in attr_classes:
machine.state = MachineState.Unknown
elif CircuitScraper._class_dryer in attr_classes:
# Technically we could just pretend this one doesn't exist, but we'll keep it in for posterity's sake.
machine.state = MachineState.Available
else:
machine.state = MachineState.Available
if machine.state == MachineState.InUse:
spans = item.select("p span")
if len(spans) == 0:
machine.minutes_remaining = None
else:
minutes_remaining_text = spans[0].get_text(strip=True)
machine.minutes_remaining = int(minutes_remaining_text.replace("mins", "").strip())
else:
machine.minutes_remaining = None
machines.append(machine)
return machines