From a267532adbdfe7a81559f6e09672e59ef1048cb9 Mon Sep 17 00:00:00 2001
From: APT96 <info@apt96.com>
Date: Wed, 4 Feb 2026 19:45:55 +0000
Subject: [PATCH] Added scraper that will populate a csv spreadsheet with
 timestampped scrape data

---
 ncicscraper.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 ncicscraper.py

diff --git a/ncicscraper.py b/ncicscraper.py
new file mode 100644
index 0000000..40a2c0f
--- /dev/null
+++ b/ncicscraper.py
@@ -0,0 +1,83 @@
+import requests
+from bs4 import BeautifulSoup
+import re
+from datetime import datetime, timezone
+import csv
+import os
+
+
+def simplify_name(full_name):
+    name = re.sub(r"(,.*)?\s*24hrs", "", full_name, flags=re.IGNORECASE).strip()
+
+    if "Penrith Community Hospital" in name:
+        name = "Penrith Community Hospital"
+    elif "Keswick Community Hospital" in name:
+        name = "Keswick Community Hospital"
+
+    return name
+
+
+def fetch_data():
+    url = "https://www.ncic.nhs.uk/waiting/ncic-live-emergencytimes.html"
+    response = requests.get(url, timeout=30)
+    response.raise_for_status()
+
+    soup = BeautifulSoup(response.text, "html.parser")
+    tables = soup.find_all("tbody")
+
+    if len(tables) < 2:
+        raise RuntimeError("Could not find expected tables")
+
+    results = []
+
+    categories = ["A&E", "UTC"]
+
+    for idx, table in enumerate(tables[:2]):
+        rows = table.find_all("tr")
+
+        for row in rows:
+            cols = row.find_all("td")
+            if len(cols) < 4:
+                continue
+
+            department = cols[0].get_text(separator=" ", strip=True)
+            hospital = simplify_name(department)
+
+            patients = cols[1].get_text(strip=True)
+            avg_wait = cols[2].get_text(strip=True)
+            arrivals = cols[3].get_text(strip=True)
+
+            results.append({
+                "timestamp_gmt": datetime.now(timezone.utc).isoformat(),
+                "category": categories[idx],
+                "hospital": hospital,
+                "patients": patients,
+                "avg_wait": avg_wait,
+                "arrivals_last_hour": arrivals,
+            })
+
+    return results
+
+
+def save_to_csv(data, filename="waiting_times.csv"):
+    # Check if the file already exists
+    file_exists = os.path.isfile(filename)
+
+    # Open file in append mode (creates if it doesn't exist)
+    with open(filename, mode='a', newline='', encoding='utf-8') as file:
+        fieldnames = ["timestamp_gmt", "category", "hospital", "patients", "avg_wait", "arrivals_last_hour"]
+        writer = csv.DictWriter(file, fieldnames=fieldnames)
+
+        # If the file is new, write the header
+        if not file_exists:
+            writer.writeheader()
+
+        # Write the data rows
+        for row in data:
+            writer.writerow(row)
+
+
+if __name__ == "__main__":
+    data = fetch_data()    # Scrape data
+    save_to_csv(data)      # Save data to CSV
+    print(f"Data saved to 'waiting_times.csv'.")