Added scraper that will populate a csv spreadsheet with timestampped scrape data
This commit is contained in:
83
ncicscraper.py
Normal file
83
ncicscraper.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def simplify_name(full_name):
|
||||||
|
name = re.sub(r"(,.*)?\s*24hrs", "", full_name, flags=re.IGNORECASE).strip()
|
||||||
|
|
||||||
|
if "Penrith Community Hospital" in name:
|
||||||
|
name = "Penrith Community Hospital"
|
||||||
|
elif "Keswick Community Hospital" in name:
|
||||||
|
name = "Keswick Community Hospital"
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_data():
|
||||||
|
url = "https://www.ncic.nhs.uk/waiting/ncic-live-emergencytimes.html"
|
||||||
|
response = requests.get(url, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
tables = soup.find_all("tbody")
|
||||||
|
|
||||||
|
if len(tables) < 2:
|
||||||
|
raise RuntimeError("Could not find expected tables")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
categories = ["A&E", "UTC"]
|
||||||
|
|
||||||
|
for idx, table in enumerate(tables[:2]):
|
||||||
|
rows = table.find_all("tr")
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
cols = row.find_all("td")
|
||||||
|
if len(cols) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
department = cols[0].get_text(separator=" ", strip=True)
|
||||||
|
hospital = simplify_name(department)
|
||||||
|
|
||||||
|
patients = cols[1].get_text(strip=True)
|
||||||
|
avg_wait = cols[2].get_text(strip=True)
|
||||||
|
arrivals = cols[3].get_text(strip=True)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"timestamp_gmt": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"category": categories[idx],
|
||||||
|
"hospital": hospital,
|
||||||
|
"patients": patients,
|
||||||
|
"avg_wait": avg_wait,
|
||||||
|
"arrivals_last_hour": arrivals,
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_csv(data, filename="waiting_times.csv"):
|
||||||
|
# Check if the file already exists
|
||||||
|
file_exists = os.path.isfile(filename)
|
||||||
|
|
||||||
|
# Open file in append mode (creates if it doesn't exist)
|
||||||
|
with open(filename, mode='a', newline='', encoding='utf-8') as file:
|
||||||
|
fieldnames = ["timestamp_gmt", "category", "hospital", "patients", "avg_wait", "arrivals_last_hour"]
|
||||||
|
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
# If the file is new, write the header
|
||||||
|
if not file_exists:
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
# Write the data rows
|
||||||
|
for row in data:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = fetch_data() # Scrape data
|
||||||
|
save_to_csv(data) # Save data to CSV
|
||||||
|
print(f"Data saved to 'waiting_times.csv'.")
|
||||||
Reference in New Issue
Block a user