Added scraper that will populate a csv spreadsheet with timestampped scrape data

2026-02-04 19:45:55 +00:00
parent c22ea29c24
commit a267532adb
1 changed files with 83 additions and 0 deletions
--- a/ncicscraper.py
+++ b/ncicscraper.py
@@ -0,0 +1,83 @@
 import requests
 from bs4 import BeautifulSoup
 import re
 from datetime import datetime, timezone
 import csv
 import os
 def simplify_name(full_name):
    name = re.sub(r"(,.*)?\s*24hrs", "", full_name, flags=re.IGNORECASE).strip()
    if "Penrith Community Hospital" in name:
        name = "Penrith Community Hospital"
    elif "Keswick Community Hospital" in name:
        name = "Keswick Community Hospital"
    return name
 def fetch_data():
    url = "https://www.ncic.nhs.uk/waiting/ncic-live-emergencytimes.html"
    response = requests.get(url, timeout=30)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")
    tables = soup.find_all("tbody")
    if len(tables) < 2:
        raise RuntimeError("Could not find expected tables")
    results = []
    categories = ["A&E", "UTC"]
    for idx, table in enumerate(tables[:2]):
        rows = table.find_all("tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 4:
                continue
            department = cols[0].get_text(separator=" ", strip=True)
            hospital = simplify_name(department)
            patients = cols[1].get_text(strip=True)
            avg_wait = cols[2].get_text(strip=True)
            arrivals = cols[3].get_text(strip=True)
            results.append({
                "timestamp_gmt": datetime.now(timezone.utc).isoformat(),
                "category": categories[idx],
                "hospital": hospital,
                "patients": patients,
                "avg_wait": avg_wait,
                "arrivals_last_hour": arrivals,
            })
    return results
 def save_to_csv(data, filename="waiting_times.csv"):
    # Check if the file already exists
    file_exists = os.path.isfile(filename)
    # Open file in append mode (creates if it doesn't exist)
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        fieldnames = ["timestamp_gmt", "category", "hospital", "patients", "avg_wait", "arrivals_last_hour"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        # If the file is new, write the header
        if not file_exists:
            writer.writeheader()
        # Write the data rows
        for row in data:
            writer.writerow(row)
 if __name__ == "__main__":
    data = fetch_data()    # Scrape data
    save_to_csv(data)      # Save data to CSV
    print(f"Data saved to 'waiting_times.csv'.")