From 3804833291b2f30f71ace7ad64b8b2ea44150a24 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 07:20:54 +0000 Subject: [PATCH] Fix SND (Sunderland): add cert bypass and photo extraction Sunderland's CMIS server (committees.sunderland.gov.uk) uses a TLS certificate not trusted by wreq's embedded BoringSSL CA bundle. Adding verify_requests = False bypasses cert verification so wreq can reach the endpoint. The server had a transient outage (503) that cleared up, so the fix can now be verified. Additionally, the CMIS list-page card divs each contain a PenPicResize img element with the councillor's headshot. The base CMISCouncillorScraper does not extract photos, so this scraper overrides get_single_councillor to pull the photo URL from the list-page HTML and resolve it against the base URL. All 75 councillors now have photos. Fixes #359. --- scrapers/SND-sunderland/councillors.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scrapers/SND-sunderland/councillors.py b/scrapers/SND-sunderland/councillors.py index 4c4029b6..cf8320a4 100644 --- a/scrapers/SND-sunderland/councillors.py +++ b/scrapers/SND-sunderland/councillors.py @@ -1,6 +1,19 @@ +from urllib.parse import urljoin + from lgsf.councillors.scrapers import CMISCouncillorScraper class Scraper(CMISCouncillorScraper): + verify_requests = False + def get_party_name(self, list_page_html): return list_page_html.find_all("img")[-1]["alt"].replace("(logo)", "").strip() + + def get_single_councillor(self, list_page_html): + councillor = super().get_single_councillor(list_page_html) + img = list_page_html.find("img", {"class": "PenPicResize"}) + if img: + src = img.get("data-src") or img.get("src") + if src and not src.startswith("data:"): + councillor.photo_url = urljoin(self.base_url, src) + return councillor