From a267532adbdfe7a81559f6e09672e59ef1048cb9 Mon Sep 17 00:00:00 2001 From: APT96 Date: Wed, 4 Feb 2026 19:45:55 +0000 Subject: [PATCH] Added scraper that will populate a csv spreadsheet with timestampped scrape data --- ncicscraper.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 ncicscraper.py diff --git a/ncicscraper.py b/ncicscraper.py new file mode 100644 index 0000000..40a2c0f --- /dev/null +++ b/ncicscraper.py @@ -0,0 +1,83 @@ +import requests +from bs4 import BeautifulSoup +import re +from datetime import datetime, timezone +import csv +import os + + +def simplify_name(full_name): + name = re.sub(r"(,.*)?\s*24hrs", "", full_name, flags=re.IGNORECASE).strip() + + if "Penrith Community Hospital" in name: + name = "Penrith Community Hospital" + elif "Keswick Community Hospital" in name: + name = "Keswick Community Hospital" + + return name + + +def fetch_data(): + url = "https://www.ncic.nhs.uk/waiting/ncic-live-emergencytimes.html" + response = requests.get(url, timeout=30) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + tables = soup.find_all("tbody") + + if len(tables) < 2: + raise RuntimeError("Could not find expected tables") + + results = [] + + categories = ["A&E", "UTC"] + + for idx, table in enumerate(tables[:2]): + rows = table.find_all("tr") + + for row in rows: + cols = row.find_all("td") + if len(cols) < 4: + continue + + department = cols[0].get_text(separator=" ", strip=True) + hospital = simplify_name(department) + + patients = cols[1].get_text(strip=True) + avg_wait = cols[2].get_text(strip=True) + arrivals = cols[3].get_text(strip=True) + + results.append({ + "timestamp_gmt": datetime.now(timezone.utc).isoformat(), + "category": categories[idx], + "hospital": hospital, + "patients": patients, + "avg_wait": avg_wait, + "arrivals_last_hour": arrivals, + }) + + return results + + +def save_to_csv(data, filename="waiting_times.csv"): + # Check if the file already exists + file_exists = os.path.isfile(filename) + + # Open file in append mode (creates if it doesn't exist) + with open(filename, mode='a', newline='', encoding='utf-8') as file: + fieldnames = ["timestamp_gmt", "category", "hospital", "patients", "avg_wait", "arrivals_last_hour"] + writer = csv.DictWriter(file, fieldnames=fieldnames) + + # If the file is new, write the header + if not file_exists: + writer.writeheader() + + # Write the data rows + for row in data: + writer.writerow(row) + + +if __name__ == "__main__": + data = fetch_data() # Scrape data + save_to_csv(data) # Save data to CSV + print(f"Data saved to 'waiting_times.csv'.")