Add scraper
Signed-off-by: AKP <tom@tdpain.net>
This commit is contained in:
parent
23055fc798
commit
9139137858
2 changed files with 183 additions and 0 deletions
89
circuit-laundry-notifier/circuit_scraper.py
Normal file
89
circuit-laundry-notifier/circuit_scraper.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
from typing import *
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class MachineState(Enum):
|
||||
Available = "AVAIL"
|
||||
InUse = "IN_USE"
|
||||
Completed = "COMPLETED"
|
||||
Unknown = "UNKNOWN"
|
||||
|
||||
|
||||
class MachineType(Enum):
|
||||
Washer = "WASHER"
|
||||
Dryer = "DRYER"
|
||||
|
||||
|
||||
@dataclass(init=False, repr=True)
|
||||
class Machine:
|
||||
number: str
|
||||
type: MachineType
|
||||
state: MachineState
|
||||
minutes_remaining: Optional[int]
|
||||
|
||||
|
||||
class CircuitScraper:
|
||||
_base_url: str = "https://www.circuit.co.uk/circuit-view/laundry-site"
|
||||
|
||||
_class_washer = "accordion__title"
|
||||
_class_dryer = "accordion__title--dryer"
|
||||
_class_in_use = "accordion__title--in-use"
|
||||
_class_completed = "accordion__title--idle"
|
||||
_class_state_unknown = "accordion__title--unknown"
|
||||
|
||||
@staticmethod
|
||||
def _get_site_url(site_id: str) -> str:
|
||||
return CircuitScraper._base_url + f"/?site={site_id}"
|
||||
|
||||
@staticmethod
|
||||
def get_site_machine_states(site_id: str) -> List[Machine]:
|
||||
site_url = CircuitScraper._get_site_url(site_id)
|
||||
|
||||
r = requests.get(site_url)
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
|
||||
machines = []
|
||||
for item in soup.select("section.accordions--circuit-view.js-machine-type"):
|
||||
machines += list(item.select("div.accordion"))
|
||||
|
||||
for item in machines:
|
||||
states = item.select("div.accordion__slug div.accordion__title")
|
||||
if len(states) == 0:
|
||||
continue
|
||||
|
||||
attr_classes = states[0].attrs.get("class", [])
|
||||
machine = Machine()
|
||||
|
||||
descriptor_text = states[0].get_text().lower()
|
||||
|
||||
machine.type = MachineType.Dryer if "dryer" in descriptor_text else MachineType.Washer
|
||||
machine.number = descriptor_text.replace("washer", "").replace("dryer", "").strip()
|
||||
|
||||
# Note that CircuitScraper._class_washer is included on every item, hence if it's none of the other ones are
|
||||
# present, we fall back to that one.
|
||||
if CircuitScraper._class_in_use in attr_classes:
|
||||
machine.state = MachineState.InUse
|
||||
elif CircuitScraper._class_completed in attr_classes:
|
||||
machine.state = MachineState.Completed
|
||||
elif CircuitScraper._class_state_unknown in attr_classes:
|
||||
machine.state = MachineState.Unknown
|
||||
elif CircuitScraper._class_dryer in attr_classes:
|
||||
# Technically we could just pretend this one doesn't exist, but we'll keep it in for posterity's sake.
|
||||
machine.state = MachineState.Available
|
||||
else:
|
||||
machine.state = MachineState.Available
|
||||
|
||||
if machine.state == MachineState.InUse:
|
||||
minutes_remaining_text = item.select("p span")[0].get_text(strip=True)
|
||||
machine.minutes_remaining = int(minutes_remaining_text.replace("mins", "").strip())
|
||||
else:
|
||||
machine.minutes_remaining = None
|
||||
|
||||
machines.append(machine)
|
||||
|
||||
return machines
|
94
poetry.lock
generated
Normal file
94
poetry.lock
generated
Normal file
|
@ -0,0 +1,94 @@
|
|||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.11.1"
|
||||
description = "Screen-scraping library"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
|
||||
[package.extras]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2022.9.24"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "2.1.1"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
|
||||
[package.extras]
|
||||
unicode_backport = ["unicodedata2"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.4"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.28.1"
|
||||
description = "Python HTTP for Humans."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7, <4"
|
||||
|
||||
[package.dependencies]
|
||||
certifi = ">=2017.4.17"
|
||||
charset-normalizer = ">=2,<3"
|
||||
idna = ">=2.5,<4"
|
||||
urllib3 = ">=1.21.1,<1.27"
|
||||
|
||||
[package.extras]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.3.2.post1"
|
||||
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.12"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
|
||||
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "f387c917af52c11962400d12327a47dde4e322812f89078f6592ed4809432d7a"
|
||||
|
||||
[metadata.files]
|
||||
beautifulsoup4 = []
|
||||
certifi = []
|
||||
charset-normalizer = []
|
||||
idna = []
|
||||
requests = []
|
||||
soupsieve = []
|
||||
urllib3 = []
|
Reference in a new issue