From 4af2c22859915240c0e9b297f4e82150f43392c1 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 16 Jun 2026 14:21:02 +0100 Subject: [PATCH 01/10] Impute household bus fare spending from LCFS COICOP 7.3.2 Add bus_fare_spending as a new output of the consumption QRF, summed from the detailed LCFS bus & coach fare codes (c73212/c73213/c73214), annualised and CPI-uprated like other consumption categories. This gives the passenger fare households pay, distinct from bus_subsidy_spending (the ETB government-subsidy benefit-in-kind), as a building block for modelling bus fare reforms. Recorded household-level only; person-level allocation by age (for e.g. a young-person fare scheme) needs an external NTS usage profile. Refs #427. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../datasets/imputations/consumption.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index bf8ad42f..01781caf 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -146,6 +146,15 @@ "p537": "domestic_energy_consumption", # aggregate kept for backward compat } +# LCFS detailed COICOP codes for passenger transport by road (7.3.2): bus and +# coach fares. There is no single P-code for bus fares alone — P607 +# (transport_consumption) bundles vehicle purchase, running costs, fuel, air and +# rail — so bus_fare_spending is summed from the detailed 7.3.2 codes. Excludes +# rail (7.3.1, c731xx), air (7.3.3), combined tickets (7.3.5) and taxis (7.3.6). +# Codes verified present in the LCFS 2021/22 dvhh file; see the LCFS data +# dictionary for sub-code definitions. +BUS_FARE_LCFS_CODES = ["c73212", "c73213", "c73214"] + PREDICTOR_VARIABLES = [ "is_adult", "is_child", @@ -174,6 +183,7 @@ "miscellaneous_consumption", "petrol_spending", "diesel_spending", + "bus_fare_spending", # COICOP 7.3.2 bus & coach fares (see BUS_FARE_LCFS_CODES) "domestic_energy_consumption", # aggregate; backward compat with price cap subsidy "electricity_consumption", "gas_consumption", @@ -585,11 +595,21 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame) household = household.rename(columns=CONSUMPTION_VARIABLE_RENAMES) + # Bus & coach fares (COICOP 7.3.2), summed from the detailed LCFS codes. + # Recorded household-level only — LCFS has no person-level fare field — so + # this is the household total; allocating to individuals (e.g. for an + # age-targeted fare reform) requires an external age-usage profile (NTS). + household["bus_fare_spending"] = sum( + pd.to_numeric(household[code], errors="coerce").fillna(0) + for code in BUS_FARE_LCFS_CODES + ) + # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant # (365.25 / 7 ≈ 52.1786) as `datasets/frs.py` rather than a bare `* 52`, # which underestimates annual totals by ~0.34% and skews VAT / energy # imputation targets against FRS income. annualise = list(CONSUMPTION_VARIABLE_RENAMES.values()) + [ + "bus_fare_spending", "hbai_household_net_income", "household_gross_income", "electricity_consumption", From bdc6a7e5e637c5f4f2dc710bfc481370c1b18f04 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 16 Jun 2026 14:22:29 +0100 Subject: [PATCH 02/10] Add changelog entry for bus fare spending imputation (#428) Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/428.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/428.md diff --git a/changelog.d/428.md b/changelog.d/428.md new file mode 100644 index 00000000..dcb879d2 --- /dev/null +++ b/changelog.d/428.md @@ -0,0 +1 @@ +- Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Refs #427. From 2c65c782d102fe4790dfcc3a72bc8213b4b54a22 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 16 Jun 2026 14:37:07 +0100 Subject: [PATCH 03/10] Add bus calibration smoke-test targets Add a bus_fare_spending aggregate target (GBP 3.4bn passenger fare receipts, DfT Annual Bus Statistics year ending March 2025) and re-enable the bus_subsidy_spending target (GBP 2.5bn). Guard test_aggregates to skip any variable not present in the loaded dataset, so bus_fare_spending self-activates once a dataset built with the new imputation is published rather than failing on a default-zero aggregate against the currently-downloaded dataset. Verified locally against the default dataset: bus_subsidy_spending 2.21bn (11.6% rel err, passes), bus_fare_spending skips (not yet in dataset). Refs #427. Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/428.md | 2 +- policyengine_uk_data/tests/test_aggregates.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/changelog.d/428.md b/changelog.d/428.md index dcb879d2..a06e654b 100644 --- a/changelog.d/428.md +++ b/changelog.d/428.md @@ -1 +1 @@ -- Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Refs #427. +- Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Adds a `bus_fare_spending` smoke-test aggregate target (GBP 3.4bn, DfT Annual Bus Statistics year ending March 2025) that self-activates once a dataset built with the imputation is published, and re-enables the `bus_subsidy_spending` target (GBP 2.5bn). Refs #427. diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 868bfe94..9782fee6 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -6,12 +6,26 @@ # ORR/GOV.UK rail finance statistics report GBP 21.6bn of government # support to the rail industry in 2024-25. "rail_subsidy_spending": 21.6e9, - # "bus_subsidy_spending": 2.5e9, + # GOV.UK rail-fares-freeze passenger savings / Health Foundation: public + # support for local bus services in Great Britain ~GBP 2.5bn. + "bus_subsidy_spending": 2.5e9, + # DfT Annual Bus Statistics (year ending March 2025): passenger fare + # receipts on local bus services in Great Britain were GBP 3.4bn (~52% of + # operating revenue). This is the consumer fare imputed as bus_fare_spending, + # distinct from the bus_subsidy_spending government support above. + "bus_fare_spending": 3.4e9, } @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): + # Newly imputed variables (e.g. bus_fare_spending) only appear once a + # dataset built with the new imputation is published; skip until the + # downloaded dataset actually provides the column rather than failing on a + # default-zero aggregate. + if variable not in baseline.input_variables: + pytest.skip(f"{variable} not present in the loaded dataset") + estimate = baseline.calculate(variable, map_to="household", period=2025).sum() assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( From 7ea6ec90c9fda95a20edcf196d4cef86bc4a07fe Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Tue, 16 Jun 2026 15:49:37 +0100 Subject: [PATCH 04/10] Make bus fare derivation tolerant of missing sub-codes; cover in test The LCFS ingestion test builds a minimal header without the bus & coach sub-codes, which raised KeyError. Sum whichever of the granular COICOP 7.3.2 sub-codes are present (they are sparse and the exact set can vary across LCFS vintages); a wholesale disappearance is caught by the bus_fare_spending aggregate smoke test. Add the codes to the ingestion fixture and assert the annualised bus fare. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../datasets/imputations/consumption.py | 11 +++++++++-- .../tests/test_lcfs_consumption_ingestion.py | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 01781caf..21eda41a 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -599,9 +599,16 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame) # Recorded household-level only — LCFS has no person-level fare field — so # this is the household total; allocating to individuals (e.g. for an # age-targeted fare reform) requires an external age-usage profile (NTS). + # Sum whichever of the granular sub-codes are present: they are sparse and + # the exact set can vary across LCFS vintages. A wholesale disappearance + # (all zero) is caught by the bus_fare_spending aggregate smoke test. household["bus_fare_spending"] = sum( - pd.to_numeric(household[code], errors="coerce").fillna(0) - for code in BUS_FARE_LCFS_CODES + ( + pd.to_numeric(household[code], errors="coerce").fillna(0) + for code in BUS_FARE_LCFS_CODES + if code in household.columns + ), + pd.Series(0.0, index=household.index), ) # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant diff --git a/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py b/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py index 3641061c..f421f44b 100644 --- a/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py +++ b/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py @@ -30,6 +30,9 @@ def add_has_fuel(household): **{f"p{code}": [1.0] for code in range(601, 613)}, "c72211": [5.0], "c72212": [6.0], + "c73212": [2.0], + "c73213": [3.0], + "c73214": [1.0], } ) person = pd.DataFrame( @@ -50,6 +53,8 @@ def add_has_fuel(household): assert result["accommodation_type"].iloc[0] == "HOUSE_SEMI_DETACHED" assert result["employment_income"].iloc[0] == 300.0 * WEEKS_IN_YEAR assert result["household_weight"].iloc[0] == 500 + # Bus fare = sum of the COICOP 7.3.2 codes (2 + 3 + 1), annualised. + assert result["bus_fare_spending"].iloc[0] == 6.0 * WEEKS_IN_YEAR assert ( result["domestic_energy_consumption"].iloc[0] == result["electricity_consumption"].iloc[0] + result["gas_consumption"].iloc[0] From e97e0706a0f1f16955956fdd286192859dcf78ff Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 09:55:10 +0100 Subject: [PATCH 05/10] Retry OBR workbook downloads on transient HTTP errors _download_workbook did requests.get + raise_for_status with no retry, so a single OBR 429 (rate limit) dropped the OBR target set and failed test_target_registry::test_obr_income_tax_value with StopIteration. Add bounded exponential-backoff retry on 429/5xx and connection errors, honouring a numeric Retry-After header; lru_cache still downloads each workbook at most once per run on success. Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/428.md | 1 + policyengine_uk_data/targets/sources/obr.py | 45 ++++++++++++++++++--- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/changelog.d/428.md b/changelog.d/428.md index a06e654b..9c14604a 100644 --- a/changelog.d/428.md +++ b/changelog.d/428.md @@ -1 +1,2 @@ - Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Adds a `bus_fare_spending` smoke-test aggregate target (GBP 3.4bn, DfT Annual Bus Statistics year ending March 2025) that self-activates once a dataset built with the imputation is published, and re-enables the `bus_subsidy_spending` target (GBP 2.5bn). Refs #427. +- Retry OBR detailed-forecast-table downloads with exponential backoff on transient HTTP errors (429/5xx) and connection failures, so an occasional OBR rate-limit no longer drops the OBR target set and reds the build. diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py index 82b270be..9f2f31f7 100644 --- a/policyengine_uk_data/targets/sources/obr.py +++ b/policyengine_uk_data/targets/sources/obr.py @@ -11,6 +11,7 @@ import io import logging +import time from functools import lru_cache import openpyxl @@ -37,12 +38,46 @@ } -@lru_cache(maxsize=1) +# OBR occasionally rate-limits CI runners (HTTP 429) or returns transient 5xx +# errors. Retry with exponential backoff so a single throttled response does not +# drop the whole OBR target set and red an unrelated build. +_DOWNLOAD_MAX_ATTEMPTS = 4 +_DOWNLOAD_RETRY_STATUSES = {429, 500, 502, 503, 504} + + +@lru_cache(maxsize=2) def _download_workbook(url: str) -> openpyxl.Workbook: - """Download an xlsx from OBR and return an openpyxl workbook.""" - r = requests.get(url, headers=HEADERS, allow_redirects=True, timeout=60) - r.raise_for_status() - return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False) + """Download an xlsx from OBR and return an openpyxl workbook. + + Retries transient HTTP errors (429/5xx) and connection failures with + exponential backoff, honouring a numeric Retry-After header when present. + """ + last_error: Exception | None = None + for attempt in range(_DOWNLOAD_MAX_ATTEMPTS): + wait = 2**attempt + try: + r = requests.get(url, headers=HEADERS, allow_redirects=True, timeout=60) + except requests.RequestException as e: + last_error = e # connection/timeout — retryable + else: + if r.status_code not in _DOWNLOAD_RETRY_STATUSES: + r.raise_for_status() + return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False) + last_error = requests.HTTPError( + f"{r.status_code} for url: {url}", response=r + ) + retry_after = r.headers.get("Retry-After", "") + if retry_after.isdigit(): + wait = int(retry_after) + if attempt < _DOWNLOAD_MAX_ATTEMPTS - 1: + logger.warning( + "OBR download %s failed (%s); retrying in %ss", + url, + last_error, + wait, + ) + time.sleep(wait) + raise last_error def _read_row_values(ws, row_num: int, col_letters: list[str]) -> dict[int, float]: From 8d13b2f2e7fd9e7883477e609e805e81556d50c9 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 10:05:57 +0100 Subject: [PATCH 06/10] Remove fallbacks from bus fare edits Sum the bus & coach COICOP codes explicitly (fail loud if a column is missing, matching the petrol/diesel pattern) rather than tolerating absent sub-codes. Drop the test_aggregates skip guard and instead record bus_fare_spending as a commented-out target (repo convention) to enable once a dataset with the imputation is published; bus_subsidy_spending stays active. Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/428.md | 2 +- .../datasets/imputations/consumption.py | 10 +--------- policyengine_uk_data/tests/test_aggregates.py | 17 +++++------------ 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/changelog.d/428.md b/changelog.d/428.md index 9c14604a..ba337597 100644 --- a/changelog.d/428.md +++ b/changelog.d/428.md @@ -1,2 +1,2 @@ -- Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Adds a `bus_fare_spending` smoke-test aggregate target (GBP 3.4bn, DfT Annual Bus Statistics year ending March 2025) that self-activates once a dataset built with the imputation is published, and re-enables the `bus_subsidy_spending` target (GBP 2.5bn). Refs #427. +- Impute household `bus_fare_spending` (COICOP 7.3.2 bus & coach fares) from the detailed LCFS codes as a new output of the consumption QRF, providing the passenger fare households pay (distinct from the ETB-based `bus_subsidy_spending`) as a building block for modelling bus fare reforms. Re-enables the `bus_subsidy_spending` smoke-test target (GBP 2.5bn) and records the `bus_fare_spending` target (GBP 3.4bn, DfT Annual Bus Statistics year ending March 2025) to enable once a dataset built with the imputation is published. Refs #427. - Retry OBR detailed-forecast-table downloads with exponential backoff on transient HTTP errors (429/5xx) and connection failures, so an occasional OBR rate-limit no longer drops the OBR target set and reds the build. diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 21eda41a..ba8d6ba4 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -599,16 +599,8 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame) # Recorded household-level only — LCFS has no person-level fare field — so # this is the household total; allocating to individuals (e.g. for an # age-targeted fare reform) requires an external age-usage profile (NTS). - # Sum whichever of the granular sub-codes are present: they are sparse and - # the exact set can vary across LCFS vintages. A wholesale disappearance - # (all zero) is caught by the bus_fare_spending aggregate smoke test. household["bus_fare_spending"] = sum( - ( - pd.to_numeric(household[code], errors="coerce").fillna(0) - for code in BUS_FARE_LCFS_CODES - if code in household.columns - ), - pd.Series(0.0, index=household.index), + household[code] for code in BUS_FARE_LCFS_CODES ) # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 9782fee6..1d8f526d 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -9,23 +9,16 @@ # GOV.UK rail-fares-freeze passenger savings / Health Foundation: public # support for local bus services in Great Britain ~GBP 2.5bn. "bus_subsidy_spending": 2.5e9, - # DfT Annual Bus Statistics (year ending March 2025): passenger fare - # receipts on local bus services in Great Britain were GBP 3.4bn (~52% of - # operating revenue). This is the consumer fare imputed as bus_fare_spending, - # distinct from the bus_subsidy_spending government support above. - "bus_fare_spending": 3.4e9, + # bus_fare_spending: DfT Annual Bus Statistics (year ending March 2025), + # GBP 3.4bn passenger fare receipts (~52% of operating revenue). Enable once + # a dataset built with the bus_fare_spending imputation is published — the + # column is absent from the currently-released dataset. + # "bus_fare_spending": 3.4e9, } @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): - # Newly imputed variables (e.g. bus_fare_spending) only appear once a - # dataset built with the new imputation is published; skip until the - # downloaded dataset actually provides the column rather than failing on a - # default-zero aggregate. - if variable not in baseline.input_variables: - pytest.skip(f"{variable} not present in the loaded dataset") - estimate = baseline.calculate(variable, map_to="household", period=2025).sum() assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( From 6958fccf289efa546478daab4eb7804818337b00 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 10:32:41 +0100 Subject: [PATCH 07/10] Clarify bus fare code provenance and verification caveat The codes were verified against LCFS 2021/22 but the current release is 2023/24; reword the comment to state codes are confirmed for 2021/22 and must be re-confirmed when bumping CURRENT_LCFS_RELEASE, and that they resolve directly at build time (a renamed/removed code fails loudly). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../datasets/imputations/consumption.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index ba8d6ba4..6f1d5d59 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -146,13 +146,15 @@ "p537": "domestic_energy_consumption", # aggregate kept for backward compat } -# LCFS detailed COICOP codes for passenger transport by road (7.3.2): bus and -# coach fares. There is no single P-code for bus fares alone — P607 +# LCFS detailed COICOP codes for bus & coach fares (passenger transport by +# road). There is no single P-code for bus fares alone — P607 # (transport_consumption) bundles vehicle purchase, running costs, fuel, air and -# rail — so bus_fare_spending is summed from the detailed 7.3.2 codes. Excludes -# rail (7.3.1, c731xx), air (7.3.3), combined tickets (7.3.5) and taxis (7.3.6). -# Codes verified present in the LCFS 2021/22 dvhh file; see the LCFS data -# dictionary for sub-code definitions. +# rail — so bus_fare_spending is summed from the detailed codes. Excludes rail +# (c731xx), air, combined tickets and taxis (which the LCFS codes separately). +# Present in the LCFS 2021/22 dvhh file; re-confirm against the data dictionary +# whenever CURRENT_LCFS_RELEASE is bumped, as detailed sub-codes can change +# between vintages (these resolve directly against the current release at build +# time, so a renamed/removed code fails loudly rather than silently zeroing). BUS_FARE_LCFS_CODES = ["c73212", "c73213", "c73214"] PREDICTOR_VARIABLES = [ From 2acc202407a9ce4939e32634b86dbead94c2d26a Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 11:01:05 +0100 Subject: [PATCH 08/10] Confirm bus fare codes present in current LCFS 2023/24 release Verified c73212/c73213/c73214 exist in dvhh_ukanon_v2_2023.tab; implied UK bus/coach fare spend ~GBP 2.66bn (2023/24, pre-uprating), consistent with the GBP 3.4bn smoke target. Co-Authored-By: Claude Opus 4.8 (1M context) --- policyengine_uk_data/datasets/imputations/consumption.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 6f1d5d59..3742bb04 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -151,10 +151,11 @@ # (transport_consumption) bundles vehicle purchase, running costs, fuel, air and # rail — so bus_fare_spending is summed from the detailed codes. Excludes rail # (c731xx), air, combined tickets and taxis (which the LCFS codes separately). -# Present in the LCFS 2021/22 dvhh file; re-confirm against the data dictionary -# whenever CURRENT_LCFS_RELEASE is bumped, as detailed sub-codes can change -# between vintages (these resolve directly against the current release at build -# time, so a renamed/removed code fails loudly rather than silently zeroing). +# Confirmed present in the LCFS 2021/22 and 2023/24 (current release) dvhh +# files; re-confirm whenever CURRENT_LCFS_RELEASE is bumped, as detailed +# sub-codes can change between vintages (these resolve directly against the +# current release at build time, so a renamed/removed code fails loudly rather +# than silently zeroing). BUS_FARE_LCFS_CODES = ["c73212", "c73213", "c73214"] PREDICTOR_VARIABLES = [ From 8b11d35a8a4486a6506f820ca5f20fab5681397e Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi <57112303+vahid-ahmadi@users.noreply.github.com> Date: Wed, 17 Jun 2026 11:02:47 +0100 Subject: [PATCH 09/10] Enable skipped bus fare aggregate target --- policyengine_uk_data/tests/test_aggregates.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 1d8f526d..a8977905 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -6,19 +6,22 @@ # ORR/GOV.UK rail finance statistics report GBP 21.6bn of government # support to the rail industry in 2024-25. "rail_subsidy_spending": 21.6e9, - # GOV.UK rail-fares-freeze passenger savings / Health Foundation: public - # support for local bus services in Great Britain ~GBP 2.5bn. + # Approximate public support for local bus services; kept as a loose + # smoke-test target because source coverage and dataset coverage differ. "bus_subsidy_spending": 2.5e9, - # bus_fare_spending: DfT Annual Bus Statistics (year ending March 2025), - # GBP 3.4bn passenger fare receipts (~52% of operating revenue). Enable once - # a dataset built with the bus_fare_spending imputation is published — the - # column is absent from the currently-released dataset. - # "bus_fare_spending": 3.4e9, + # DfT Annual Bus Statistics (year ending March 2025) report GBP 3.4bn + # passenger fare receipts for local bus services in England. The LCFS input + # is UK household bus/coach fare spending, so this is an order-of-magnitude + # smoke target until a direct UK/GB household target is available. + "bus_fare_spending": 3.4e9, } @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): + if variable not in baseline.input_variables: + pytest.skip(f"{variable} is not present in the loaded dataset") + estimate = baseline.calculate(variable, map_to="household", period=2025).sum() assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( From 94889edd4efbaa2d9359b4792219695806025497 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 11:12:00 +0100 Subject: [PATCH 10/10] Revert bus fare aggregate skip guard (no fallbacks) Re-comment the bus_fare_spending smoke target and drop the skip-when-absent guard reintroduced in 8b11d35, per the no-fallbacks decision. bus_subsidy_spending stays active; bus_fare_spending is enabled manually once a dataset with the imputation is published. Keeps the improved England-receipts caveat in the comment. Co-Authored-By: Claude Opus 4.8 (1M context) --- policyengine_uk_data/tests/test_aggregates.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index a8977905..6a63c2be 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -12,16 +12,14 @@ # DfT Annual Bus Statistics (year ending March 2025) report GBP 3.4bn # passenger fare receipts for local bus services in England. The LCFS input # is UK household bus/coach fare spending, so this is an order-of-magnitude - # smoke target until a direct UK/GB household target is available. - "bus_fare_spending": 3.4e9, + # target. Enable once a dataset built with the bus_fare_spending imputation + # is published — the column is absent from the currently-released dataset. + # "bus_fare_spending": 3.4e9, } @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): - if variable not in baseline.input_variables: - pytest.skip(f"{variable} is not present in the loaded dataset") - estimate = baseline.calculate(variable, map_to="household", period=2025).sum() assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, (