Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Certify the UK data release `populace-uk-2023-72aeefc-20260611` (populace_uk_2023, policyengine-uk 2.88.20) directly from its data release manifest, and surface its Zenodo preservation DOI (10.5281/zenodo.20678518) on the bundle TRACE TRO.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ graph = [
"networkx>=3.0",
]
uk = [
"policyengine_core>=3.26.1",
"policyengine_core>=3.27.1",
"policyengine-uk==2.88.20",
]
us = [
Expand Down
72 changes: 38 additions & 34 deletions src/policyengine/data/release_manifests/uk.json
Original file line number Diff line number Diff line change
@@ -1,53 +1,57 @@
{
"schema_version": 1,
"bundle_id": "uk-4.16.2",
"country_id": "uk",
"policyengine_version": "4.16.2",
"model_package": {
"name": "policyengine-uk",
"version": "2.88.20",
"sha256": "8c3dacb868f3fb18296b8ef2475edaf543f57b8056d24a58bca59b108651f272",
"wheel_url": "https://files.pythonhosted.org/packages/32/f0/c0e7dbcc049501dc968da0a67de4976f305228328f96fe0ad08c65301c4f/policyengine_uk-2.88.20-py3-none-any.whl"
},
"data_package": {
"name": "policyengine-uk-data",
"version": "1.55.10",
"repo_id": "policyengine/policyengine-uk-data-private",
"release_manifest_path": "release_manifest.json",
"release_manifest_revision": "655dd07e4bb9c777b00dac044949611f1feb824f"
"certification": {
"built_with_model_version": "2.86.0",
"certified_by": "policyengine.py certification",
"certified_for_model_version": "2.88.20",
"compatibility_basis": "compatible_model_packages",
"data_build_id": "populace-uk-2023-72aeefc-20260611"
},
"certified_data_artifact": {
"build_id": "populace-uk-2023-72aeefc-20260611",
"data_package": {
"name": "policyengine-uk-data",
"version": "1.55.10"
"name": "populace-data",
"version": "0.1.0"
},
"build_id": "policyengine-uk-data-1.55.10",
"dataset": "enhanced_frs_2023_24",
"uri": "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f",
"sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d"
"dataset": "populace_uk_2023",
"sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39",
"uri": "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611"
},
"certification": {
"compatibility_basis": "exact_build_model_version",
"data_build_id": "policyengine-uk-data-1.55.10",
"built_with_model_version": "2.88.20",
"certified_for_model_version": "2.88.20",
"data_build_fingerprint": "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809",
"certified_by": "policyengine.py bundled manifest"
"country_id": "uk",
"data_package": {
"name": "populace-data",
"release_manifest_path": "releases/populace-uk-2023-72aeefc-20260611/release_manifest.json",
"release_manifest_revision": "072b4067f3477d51b5023ae5250c9a1ab3b60164",
"repo_id": "policyengine/populace-uk-private",
"repo_type": "dataset",
"version": "0.1.0"
},
"default_dataset": "enhanced_frs_2023_24",
"datasets": {
"frs_2023_24": {
"path": "frs_2023_24.h5",
"repo_id": "policyengine/policyengine-uk-data-private",
"revision": "655dd07e4bb9c777b00dac044949611f1feb824f",
"sha256": "df26d4d7af9d164aa2d064181b39290292d2f62bb26fee6126fc095fc06da292"
},
"enhanced_frs_2023_24": {
"path": "enhanced_frs_2023_24.h5",
"sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d"
"populace_uk_2023": {
"path": "populace_uk_2023.h5",
"repo_id": "policyengine/populace-uk-private",
"revision": "populace-uk-2023-72aeefc-20260611",
"sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39"
}
},
"default_dataset": "populace_uk_2023",
"model_package": {
"name": "policyengine-uk",
"sha256": "8c3dacb868f3fb18296b8ef2475edaf543f57b8056d24a58bca59b108651f272",
"version": "2.88.20",
"wheel_url": "https://files.pythonhosted.org/packages/32/f0/c0e7dbcc049501dc968da0a67de4976f305228328f96fe0ad08c65301c4f/policyengine_uk-2.88.20-py3-none-any.whl"
},
"policyengine_version": "4.16.2",
"region_datasets": {
"national": {
"path_template": "enhanced_frs_2023_24.h5"
"path_template": "populace_uk_2023.h5"
}
}
},
"schema_version": 1
}
39 changes: 19 additions & 20 deletions src/policyengine/data/release_manifests/uk.trace.tro.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"schema:name": "PolicyEngine",
"schema:url": "https://policyengine.org"
},
"schema:dateCreated": "2026-05-20T20:16:50.641086Z",
"schema:dateCreated": "2026-06-11T16:41:25Z",
"schema:description": "TRACE TRO for certified runtime bundle uk-4.16.2 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.",
"schema:name": "policyengine uk certified bundle TRO",
"trov:createdWith": {
Expand Down Expand Up @@ -45,15 +45,15 @@
"trov:hasArtifact": {
"@id": "composition/1/artifact/data_release_manifest"
},
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/655dd07e4bb9c777b00dac044949611f1feb824f/release_manifest.json"
"trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-uk-private/resolve/072b4067f3477d51b5023ae5250c9a1ab3b60164/releases/populace-uk-2023-72aeefc-20260611/release_manifest.json"
},
{
"@id": "arrangement/1/location/dataset",
"@type": "trov:ArtifactLocation",
"trov:hasArtifact": {
"@id": "composition/1/artifact/dataset"
},
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/655dd07e4bb9c777b00dac044949611f1feb824f/enhanced_frs_2023_24.h5"
"trov:hasLocation": "https://huggingface.co/policyengine/populace-uk-private/resolve/populace-uk-2023-72aeefc-20260611/populace_uk_2023.h5"
},
{
"@id": "arrangement/1/location/model_wheel",
Expand All @@ -75,21 +75,21 @@
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine.py bundle manifest for uk",
"trov:mimeType": "application/json",
"trov:sha256": "5f4629bb55d361c9a1a9f2a46950570acccfefd10ec5fd04865b27d9017d10ea"
"trov:sha256": "b722a3d3faa604f0b01e64758cfd2836b1753860aef3abb942f5bad2a72a8763"
},
{
"@id": "composition/1/artifact/data_release_manifest",
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine-uk-data release manifest 1.55.10",
"schema:name": "populace-data release manifest 0.1.0",
"trov:mimeType": "application/json",
"trov:sha256": "9f41a0f14ca93d20e61d33419173c3fedc1c3ba295b6ca67dd3197a41643d179"
"trov:sha256": "ac1e5126109164c90c9a70e29531a53583e7a9047ef6678f31b459c93f48428b"
},
{
"@id": "composition/1/artifact/dataset",
"@type": "trov:ResearchArtifact",
"schema:name": "enhanced_frs_2023_24",
"schema:name": "populace_uk_2023",
"trov:mimeType": "application/x-hdf5",
"trov:sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d"
"trov:sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39"
},
{
"@id": "composition/1/artifact/model_wheel",
Expand All @@ -102,27 +102,26 @@
"trov:hasFingerprint": {
"@id": "composition/1/fingerprint",
"@type": "trov:CompositionFingerprint",
"trov:sha256": "cfdc777af8b1dd4666c26bad8ceb7a47e0fe6cec0f89c569601e90055760858e"
"trov:sha256": "a0549aa701b7f6897e51b7a4e4397e75493995bcbc8c17a8420a5063ed0f0ef2"
}
},
"trov:hasPerformance": {
"@id": "trp/1",
"@type": "trov:TransparentResearchPerformance",
"pe:builtWithModelVersion": "2.88.20",
"pe:certifiedBy": "policyengine.py bundled manifest",
"pe:builtWithModelVersion": "2.86.0",
"pe:certifiedBy": "policyengine.py certification",
"pe:certifiedForModelVersion": "2.88.20",
"pe:ciGitRef": "refs/heads/main",
"pe:ciGitSha": "8b084c5cefdf02d38dd6f974fd846905c3167f39",
"pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27407803462",
"pe:compatibilityBasis": "exact_build_model_version",
"pe:dataBuildFingerprint": "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809",
"pe:dataBuildId": "policyengine-uk-data-1.55.10",
"pe:emittedIn": "github-actions",
"rdfs:comment": "Certification of build policyengine-uk-data-1.55.10 for policyengine-uk 2.88.20.",
"pe:compatibilityBasis": "compatible_model_packages",
"pe:dataBuildId": "populace-uk-2023-72aeefc-20260611",
"pe:emittedIn": "local",
"pe:preservationDoi": [
"10.5281/zenodo.20678518"
],
"rdfs:comment": "Certification of build populace-uk-2023-72aeefc-20260611 for policyengine-uk 2.88.20.",
"trov:accessedArrangement": {
"@id": "arrangement/1"
},
"trov:startedAtTime": "2026-05-20T20:16:50.641086Z",
"trov:startedAtTime": "2026-06-11T16:41:25Z",
"trov:wasConductedBy": {
"@id": "trs"
}
Expand Down
8 changes: 8 additions & 0 deletions src/policyengine/provenance/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,14 @@ def regenerate_trace_tro(country: str, manifest_dir: Path = MANIFEST_DIR) -> Pat
serialize_trace_tro,
)

# Read the current on-disk country manifest and the live data-release
# manifest, not values a caller cached earlier in this process.
# Certification rewrites the country manifest immediately before
# calling this, so a stale cache would emit a TRO that lags the pin
# it just wrote (e.g. missing a freshly recorded preservation DOI).
get_release_manifest.cache_clear()
get_data_release_manifest.cache_clear()

release = get_release_manifest(country)
try:
data_release = get_data_release_manifest(country)
Expand Down
8 changes: 8 additions & 0 deletions src/policyengine/provenance/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,14 @@ def build_trace_tro_from_release_bundle(
)
if data_release_manifest is None:
performance["pe:dataReleaseManifestStatus"] = "unavailable"
elif data_release_manifest.preservation_dois:
# The release was mirrored to a DOI-minting preservation host
# (e.g. Zenodo). Surface the record-level DOI so a reader who
# has only this TRO can find and cite the preserved copy if the
# primary host ever becomes unavailable.
performance["pe:preservationDoi"] = list(
data_release_manifest.preservation_dois
)

tro_node = _assemble_tro_node(
tro_name=f"policyengine {country_manifest.country_id} certified bundle TRO",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"country_id": "uk",
"data_package_name": "policyengine-uk-data",
"data_package_name": "populace-data",
"has_employment_income": true,
"has_income_tax": true,
"has_region_registry": true,
Expand Down
6 changes: 3 additions & 3 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def test_has_release_manifest_metadata(self):
assert uk_latest.release_manifest.country_id == "uk"
assert uk_latest.model_package.name == "policyengine-uk"
assert uk_latest.model_package.version == "2.88.20"
assert uk_latest.data_package.name == "policyengine-uk-data"
assert uk_latest.data_package.version == "1.55.10"
assert uk_latest.data_package.name == "populace-data"
assert uk_latest.data_package.version == "0.1.0"
assert (
uk_latest.default_dataset_uri
== "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f"
== "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611"
)

def test_has_hundreds_of_parameters(self):
Expand Down
49 changes: 24 additions & 25 deletions tests/test_release_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,23 @@ def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self):
assert manifest.policyengine_version == POLICYENGINE_VERSION
assert manifest.model_package.name == "policyengine-uk"
assert manifest.model_package.version == "2.88.20"
assert manifest.data_package.name == "policyengine-uk-data"
assert manifest.data_package.version == "1.55.10"
assert (
manifest.data_package.repo_id == "policyengine/policyengine-uk-data-private"
)
assert manifest.data_package.name == "populace-data"
assert manifest.data_package.version == "0.1.0"
assert manifest.data_package.repo_id == "policyengine/populace-uk-private"
assert manifest.certified_data_artifact is not None
assert (
manifest.certified_data_artifact.build_id == "policyengine-uk-data-1.55.10"
manifest.certified_data_artifact.build_id
== "populace-uk-2023-72aeefc-20260611"
)
assert manifest.certified_data_artifact.dataset == "enhanced_frs_2023_24"
assert manifest.certified_data_artifact.dataset == "populace_uk_2023"
assert manifest.certification is not None
assert manifest.certification.data_build_id == "policyengine-uk-data-1.55.10"
assert manifest.certification.built_with_model_version == "2.88.20"
assert manifest.certification.certified_for_model_version == "2.88.20"
assert (
manifest.certification.data_build_fingerprint
== "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809"
manifest.certification.data_build_id == "populace-uk-2023-72aeefc-20260611"
)
assert manifest.certification.built_with_model_version == "2.86.0"
assert manifest.certification.certified_for_model_version == "2.88.20"
assert manifest.certification.compatibility_basis == "compatible_model_packages"
assert manifest.certification.data_build_fingerprint is None

def test__given_us_dataset_name__then_resolves_to_versioned_hf_url(self):
resolved = resolve_dataset_reference("us", "populace_us_2024")
Expand All @@ -166,11 +165,11 @@ def test__given_dataset_explicit_revision__then_resolves_to_that_revision(self):
)

def test__given_uk_dataset_name__then_resolves_to_versioned_hf_url(self):
resolved = resolve_dataset_reference("uk", "enhanced_frs_2023_24")
resolved = resolve_dataset_reference("uk", "populace_uk_2023")

assert (
resolved
== "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f"
== "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611"
)

def test__given_explicit_url__then_resolution_is_noop(self):
Expand Down Expand Up @@ -649,12 +648,12 @@ def test__given_manifest_certification__then_release_bundle_exposes_it(self):
bundle = model_version.release_bundle

assert bundle["bundle_id"] == f"uk-{POLICYENGINE_VERSION}"
assert bundle["default_dataset"] == "enhanced_frs_2023_24"
assert bundle["default_dataset"] == "populace_uk_2023"
assert bundle["default_dataset_uri"] == manifest.default_dataset_uri
assert bundle["certified_data_build_id"] == "policyengine-uk-data-1.55.10"
assert bundle["data_build_model_version"] == "2.88.20"
assert bundle["compatibility_basis"] == "exact_build_model_version"
assert bundle["certified_by"] == "policyengine.py bundled manifest"
assert bundle["certified_data_build_id"] == "populace-uk-2023-72aeefc-20260611"
assert bundle["data_build_model_version"] == "2.86.0"
assert bundle["compatibility_basis"] == "compatible_model_packages"
assert bundle["certified_by"] == "policyengine.py certification"

def test__given_runtime_certification__then_release_bundle_prefers_runtime_value(
self,
Expand Down Expand Up @@ -765,22 +764,22 @@ def test__given_uk_managed_dataset_name__then_resolves_within_bundle(self):
),
patch(
"policyengine.tax_benefit_models.uk.model.materialize_dataset_source",
return_value="/tmp/enhanced_frs_2023_24.h5",
return_value="/tmp/populace_uk_2023.h5",
),
):
microsim = managed_uk_microsimulation(dataset="enhanced_frs_2023_24")
microsim = managed_uk_microsimulation(dataset="populace_uk_2023")

dataset = mock_microsimulation.call_args.kwargs["dataset"]
assert dataset == "/tmp/enhanced_frs_2023_24.h5"
assert dataset == "/tmp/populace_uk_2023.h5"
assert (
microsim.policyengine_bundle["policyengine_version"] == POLICYENGINE_VERSION
)
assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_frs_2023_24"
assert microsim.policyengine_bundle["runtime_dataset"] == "populace_uk_2023"
assert microsim.policyengine_bundle["runtime_dataset_uri"] == (
"hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f"
"hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611"
)
dataset_source = microsim.policyengine_bundle["runtime_dataset_source"]
assert dataset_source == "/tmp/enhanced_frs_2023_24.h5"
assert dataset_source == "/tmp/populace_uk_2023.h5"

def test__given_uk_unmanaged_dataset_uri__then_source_is_not_rewritten(self):
dataset = "hf://policyengine/policyengine-uk-data-private/frs_2022_23.h5@1.40.4"
Expand Down
19 changes: 19 additions & 0 deletions tests/test_trace_tro.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,25 @@ def test__given_certification__then_fields_are_machine_readable(
== country_manifest.certification.data_build_id
)

def test__given_no_preservation_dois__then_performance_omits_the_field(
self, us_bundle_tro
):
performance = us_bundle_tro["@graph"][0]["trov:hasPerformance"]
assert "pe:preservationDoi" not in performance

def test__given_preservation_dois__then_performance_records_them(self, monkeypatch):
monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
manifest = _us_data_release_manifest().model_copy(
update={"preservation_dois": ["10.5281/zenodo.20678516"]}
)
tro = build_trace_tro_from_release_bundle(
get_release_manifest("us"),
manifest,
fetch_pypi=_fake_fetch_pypi,
)
performance = tro["@graph"][0]["trov:hasPerformance"]
assert performance["pe:preservationDoi"] == ["10.5281/zenodo.20678516"]

def test__given_github_actions_env__then_emitted_in_is_ci(self, monkeypatch):
monkeypatch.setenv("GITHUB_ACTIONS", "true")
monkeypatch.setenv("GITHUB_SERVER_URL", "https://github.com")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_uk_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test__given_uk_registry__then_has_national_region(self):
assert national.region_type == "national"
assert (
national.dataset_path
== "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f"
== "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611"
)
assert not national.requires_filter

Expand Down
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.