From 4c789e3abba00fc987391d4e09732db81ba0fac7 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 13 Jun 2026 06:06:16 -0400 Subject: [PATCH 1/2] Certify populace-uk as the UK certified default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces enhanced_frs_2023_24 (policyengine-uk-data 1.55.10) with populace_uk_2023 (build populace-uk-2023-72aeefc-20260611) as the UK certified default — the UK counterpart of the US populace promotion. The populace-uk build beat the enhanced FRS 6/6 rotations on the matched symmetric-refit protocol (primary rotation holdout 0.6008 vs 0.6537, per-target 123-25-0); parity, exported-nonzero, and smoke gates pass. Built with policyengine-uk 2.86.0, certified for the current 2.88.20 pin on the publisher's compatible_model_packages claim — made good by this suite passing on the pinned pair. Authored and published the data release manifest (with region_datasets metadata) to the private populace-uk repo, then certified from it. Fixes #408 Co-Authored-By: Claude Fable 5 --- ...pulace-uk-2023-72aeefc-20260611.changed.md | 1 + pyproject.toml | 2 +- .../data/release_manifests/uk.json | 72 ++++++++++--------- .../release_manifests/uk.trace.tro.jsonld | 36 +++++----- .../uk_model_surface.json | 2 +- tests/test_models.py | 6 +- tests/test_release_manifests.py | 49 +++++++------ tests/test_uk_regions.py | 2 +- uv.lock | 4 +- 9 files changed, 87 insertions(+), 87 deletions(-) create mode 100644 changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md diff --git a/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md b/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md new file mode 100644 index 00000000..b03575b3 --- /dev/null +++ b/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md @@ -0,0 +1 @@ +Certify the UK data release `populace-uk-2023-72aeefc-20260611` (populace_uk_2023, policyengine-uk 2.88.20) directly from its data release manifest. diff --git a/pyproject.toml b/pyproject.toml index 18111dd9..198c55eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ graph = [ "networkx>=3.0", ] uk = [ - "policyengine_core>=3.26.1", + "policyengine_core>=3.27.1", "policyengine-uk==2.88.20", ] us = [ diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json index 2048d953..90b2d054 100644 --- a/src/policyengine/data/release_manifests/uk.json +++ b/src/policyengine/data/release_manifests/uk.json @@ -1,53 +1,57 @@ { - "schema_version": 1, "bundle_id": "uk-4.16.2", - "country_id": "uk", - "policyengine_version": "4.16.2", - "model_package": { - "name": "policyengine-uk", - "version": "2.88.20", - "sha256": "8c3dacb868f3fb18296b8ef2475edaf543f57b8056d24a58bca59b108651f272", - "wheel_url": "https://files.pythonhosted.org/packages/32/f0/c0e7dbcc049501dc968da0a67de4976f305228328f96fe0ad08c65301c4f/policyengine_uk-2.88.20-py3-none-any.whl" - }, - "data_package": { - "name": "policyengine-uk-data", - "version": "1.55.10", - "repo_id": "policyengine/policyengine-uk-data-private", - "release_manifest_path": "release_manifest.json", - "release_manifest_revision": "655dd07e4bb9c777b00dac044949611f1feb824f" + "certification": { + "built_with_model_version": "2.86.0", + "certified_by": "policyengine.py certification", + "certified_for_model_version": "2.88.20", + "compatibility_basis": "compatible_model_packages", + "data_build_id": "populace-uk-2023-72aeefc-20260611" }, "certified_data_artifact": { + "build_id": "populace-uk-2023-72aeefc-20260611", "data_package": { - "name": "policyengine-uk-data", - "version": "1.55.10" + "name": "populace-data", + "version": "0.1.0" }, - "build_id": "policyengine-uk-data-1.55.10", - "dataset": "enhanced_frs_2023_24", - "uri": "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f", - "sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d" + "dataset": "populace_uk_2023", + "sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39", + "uri": "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611" }, - "certification": { - "compatibility_basis": "exact_build_model_version", - "data_build_id": "policyengine-uk-data-1.55.10", - "built_with_model_version": "2.88.20", - "certified_for_model_version": "2.88.20", - "data_build_fingerprint": "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809", - "certified_by": "policyengine.py bundled manifest" + "country_id": "uk", + "data_package": { + "name": "populace-data", + "release_manifest_path": "releases/populace-uk-2023-72aeefc-20260611/release_manifest.json", + "release_manifest_revision": "2bab9dfc38e8b28b52e3461510f6862b3557db5f", + "repo_id": "policyengine/populace-uk-private", + "repo_type": "dataset", + "version": "0.1.0" }, - "default_dataset": "enhanced_frs_2023_24", "datasets": { "frs_2023_24": { "path": "frs_2023_24.h5", + "repo_id": "policyengine/policyengine-uk-data-private", + "revision": "655dd07e4bb9c777b00dac044949611f1feb824f", "sha256": "df26d4d7af9d164aa2d064181b39290292d2f62bb26fee6126fc095fc06da292" }, - "enhanced_frs_2023_24": { - "path": "enhanced_frs_2023_24.h5", - "sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d" + "populace_uk_2023": { + "path": "populace_uk_2023.h5", + "repo_id": "policyengine/populace-uk-private", + "revision": "populace-uk-2023-72aeefc-20260611", + "sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39" } }, + "default_dataset": "populace_uk_2023", + "model_package": { + "name": "policyengine-uk", + "sha256": "8c3dacb868f3fb18296b8ef2475edaf543f57b8056d24a58bca59b108651f272", + "version": "2.88.20", + "wheel_url": "https://files.pythonhosted.org/packages/32/f0/c0e7dbcc049501dc968da0a67de4976f305228328f96fe0ad08c65301c4f/policyengine_uk-2.88.20-py3-none-any.whl" + }, + "policyengine_version": "4.16.2", "region_datasets": { "national": { - "path_template": "enhanced_frs_2023_24.h5" + "path_template": "populace_uk_2023.h5" } - } + }, + "schema_version": 1 } diff --git a/src/policyengine/data/release_manifests/uk.trace.tro.jsonld b/src/policyengine/data/release_manifests/uk.trace.tro.jsonld index 5e9446d1..0e267ba3 100644 --- a/src/policyengine/data/release_manifests/uk.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/uk.trace.tro.jsonld @@ -17,7 +17,7 @@ "schema:name": "PolicyEngine", "schema:url": "https://policyengine.org" }, - "schema:dateCreated": "2026-05-20T20:16:50.641086Z", + "schema:dateCreated": "2026-06-11T16:41:25Z", "schema:description": "TRACE TRO for certified runtime bundle uk-4.16.2 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.", "schema:name": "policyengine uk certified bundle TRO", "trov:createdWith": { @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/655dd07e4bb9c777b00dac044949611f1feb824f/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-uk-private/resolve/f9f713e3e9ff3d153097a1f8cf515dd506e3a1da/releases/populace-uk-2023-72aeefc-20260611/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -53,7 +53,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/dataset" }, - "trov:hasLocation": "https://huggingface.co/policyengine/policyengine-uk-data-private/resolve/655dd07e4bb9c777b00dac044949611f1feb824f/enhanced_frs_2023_24.h5" + "trov:hasLocation": "https://huggingface.co/policyengine/populace-uk-private/resolve/populace-uk-2023-72aeefc-20260611/populace_uk_2023.h5" }, { "@id": "arrangement/1/location/model_wheel", @@ -75,21 +75,21 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for uk", "trov:mimeType": "application/json", - "trov:sha256": "5f4629bb55d361c9a1a9f2a46950570acccfefd10ec5fd04865b27d9017d10ea" + "trov:sha256": "770c4f0a4064f3383e2dc23c4e21bacd04ff54c4fc7181070fef6a7b1fecc3bc" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", - "schema:name": "policyengine-uk-data release manifest 1.55.10", + "schema:name": "populace-data release manifest 0.1.0", "trov:mimeType": "application/json", - "trov:sha256": "9f41a0f14ca93d20e61d33419173c3fedc1c3ba295b6ca67dd3197a41643d179" + "trov:sha256": "2da838b46af75177a4b0b5e13a1827f5001773781acb3a32d7939adeca3a3e83" }, { "@id": "composition/1/artifact/dataset", "@type": "trov:ResearchArtifact", - "schema:name": "enhanced_frs_2023_24", + "schema:name": "populace_uk_2023", "trov:mimeType": "application/x-hdf5", - "trov:sha256": "584ae33d80ca0431254610a3f8254d132da73477d31966d6446282861ecae50d" + "trov:sha256": "f489b7ef18b4628d2423338a20da1264a8d197c95a0c95265fc03ac3b6bc5b39" }, { "@id": "composition/1/artifact/model_wheel", @@ -102,27 +102,23 @@ "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "cfdc777af8b1dd4666c26bad8ceb7a47e0fe6cec0f89c569601e90055760858e" + "trov:sha256": "940b77cd761050bb7c84f87515d7a35ed8d8498b37e384116eaae931d71c84a8" } }, "trov:hasPerformance": { "@id": "trp/1", "@type": "trov:TransparentResearchPerformance", - "pe:builtWithModelVersion": "2.88.20", - "pe:certifiedBy": "policyengine.py bundled manifest", + "pe:builtWithModelVersion": "2.86.0", + "pe:certifiedBy": "policyengine.py certification", "pe:certifiedForModelVersion": "2.88.20", - "pe:ciGitRef": "refs/heads/main", - "pe:ciGitSha": "8b084c5cefdf02d38dd6f974fd846905c3167f39", - "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27407803462", - "pe:compatibilityBasis": "exact_build_model_version", - "pe:dataBuildFingerprint": "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809", - "pe:dataBuildId": "policyengine-uk-data-1.55.10", - "pe:emittedIn": "github-actions", - "rdfs:comment": "Certification of build policyengine-uk-data-1.55.10 for policyengine-uk 2.88.20.", + "pe:compatibilityBasis": "compatible_model_packages", + "pe:dataBuildId": "populace-uk-2023-72aeefc-20260611", + "pe:emittedIn": "local", + "rdfs:comment": "Certification of build populace-uk-2023-72aeefc-20260611 for policyengine-uk 2.88.20.", "trov:accessedArrangement": { "@id": "arrangement/1" }, - "trov:startedAtTime": "2026-05-20T20:16:50.641086Z", + "trov:startedAtTime": "2026-06-11T16:41:25Z", "trov:wasConductedBy": { "@id": "trs" } diff --git a/tests/fixtures/household_calculator_snapshots/uk_model_surface.json b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json index 161ef0ec..2f41424d 100644 --- a/tests/fixtures/household_calculator_snapshots/uk_model_surface.json +++ b/tests/fixtures/household_calculator_snapshots/uk_model_surface.json @@ -1,6 +1,6 @@ { "country_id": "uk", - "data_package_name": "policyengine-uk-data", + "data_package_name": "populace-data", "has_employment_income": true, "has_income_tax": true, "has_region_registry": true, diff --git a/tests/test_models.py b/tests/test_models.py index bf213442..384e351b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -30,11 +30,11 @@ def test_has_release_manifest_metadata(self): assert uk_latest.release_manifest.country_id == "uk" assert uk_latest.model_package.name == "policyengine-uk" assert uk_latest.model_package.version == "2.88.20" - assert uk_latest.data_package.name == "policyengine-uk-data" - assert uk_latest.data_package.version == "1.55.10" + assert uk_latest.data_package.name == "populace-data" + assert uk_latest.data_package.version == "0.1.0" assert ( uk_latest.default_dataset_uri - == "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f" + == "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611" ) def test_has_hundreds_of_parameters(self): diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index a369a2a1..4fbabbb3 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -122,24 +122,23 @@ def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self): assert manifest.policyengine_version == POLICYENGINE_VERSION assert manifest.model_package.name == "policyengine-uk" assert manifest.model_package.version == "2.88.20" - assert manifest.data_package.name == "policyengine-uk-data" - assert manifest.data_package.version == "1.55.10" - assert ( - manifest.data_package.repo_id == "policyengine/policyengine-uk-data-private" - ) + assert manifest.data_package.name == "populace-data" + assert manifest.data_package.version == "0.1.0" + assert manifest.data_package.repo_id == "policyengine/populace-uk-private" assert manifest.certified_data_artifact is not None assert ( - manifest.certified_data_artifact.build_id == "policyengine-uk-data-1.55.10" + manifest.certified_data_artifact.build_id + == "populace-uk-2023-72aeefc-20260611" ) - assert manifest.certified_data_artifact.dataset == "enhanced_frs_2023_24" + assert manifest.certified_data_artifact.dataset == "populace_uk_2023" assert manifest.certification is not None - assert manifest.certification.data_build_id == "policyengine-uk-data-1.55.10" - assert manifest.certification.built_with_model_version == "2.88.20" - assert manifest.certification.certified_for_model_version == "2.88.20" assert ( - manifest.certification.data_build_fingerprint - == "sha256:77f149725a36055fd89961855230401852b0712d301c6e26d6d16565c6b23809" + manifest.certification.data_build_id == "populace-uk-2023-72aeefc-20260611" ) + assert manifest.certification.built_with_model_version == "2.86.0" + assert manifest.certification.certified_for_model_version == "2.88.20" + assert manifest.certification.compatibility_basis == "compatible_model_packages" + assert manifest.certification.data_build_fingerprint is None def test__given_us_dataset_name__then_resolves_to_versioned_hf_url(self): resolved = resolve_dataset_reference("us", "populace_us_2024") @@ -166,11 +165,11 @@ def test__given_dataset_explicit_revision__then_resolves_to_that_revision(self): ) def test__given_uk_dataset_name__then_resolves_to_versioned_hf_url(self): - resolved = resolve_dataset_reference("uk", "enhanced_frs_2023_24") + resolved = resolve_dataset_reference("uk", "populace_uk_2023") assert ( resolved - == "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f" + == "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611" ) def test__given_explicit_url__then_resolution_is_noop(self): @@ -649,12 +648,12 @@ def test__given_manifest_certification__then_release_bundle_exposes_it(self): bundle = model_version.release_bundle assert bundle["bundle_id"] == f"uk-{POLICYENGINE_VERSION}" - assert bundle["default_dataset"] == "enhanced_frs_2023_24" + assert bundle["default_dataset"] == "populace_uk_2023" assert bundle["default_dataset_uri"] == manifest.default_dataset_uri - assert bundle["certified_data_build_id"] == "policyengine-uk-data-1.55.10" - assert bundle["data_build_model_version"] == "2.88.20" - assert bundle["compatibility_basis"] == "exact_build_model_version" - assert bundle["certified_by"] == "policyengine.py bundled manifest" + assert bundle["certified_data_build_id"] == "populace-uk-2023-72aeefc-20260611" + assert bundle["data_build_model_version"] == "2.86.0" + assert bundle["compatibility_basis"] == "compatible_model_packages" + assert bundle["certified_by"] == "policyengine.py certification" def test__given_runtime_certification__then_release_bundle_prefers_runtime_value( self, @@ -765,22 +764,22 @@ def test__given_uk_managed_dataset_name__then_resolves_within_bundle(self): ), patch( "policyengine.tax_benefit_models.uk.model.materialize_dataset_source", - return_value="/tmp/enhanced_frs_2023_24.h5", + return_value="/tmp/populace_uk_2023.h5", ), ): - microsim = managed_uk_microsimulation(dataset="enhanced_frs_2023_24") + microsim = managed_uk_microsimulation(dataset="populace_uk_2023") dataset = mock_microsimulation.call_args.kwargs["dataset"] - assert dataset == "/tmp/enhanced_frs_2023_24.h5" + assert dataset == "/tmp/populace_uk_2023.h5" assert ( microsim.policyengine_bundle["policyengine_version"] == POLICYENGINE_VERSION ) - assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_frs_2023_24" + assert microsim.policyengine_bundle["runtime_dataset"] == "populace_uk_2023" assert microsim.policyengine_bundle["runtime_dataset_uri"] == ( - "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f" + "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611" ) dataset_source = microsim.policyengine_bundle["runtime_dataset_source"] - assert dataset_source == "/tmp/enhanced_frs_2023_24.h5" + assert dataset_source == "/tmp/populace_uk_2023.h5" def test__given_uk_unmanaged_dataset_uri__then_source_is_not_rewritten(self): dataset = "hf://policyengine/policyengine-uk-data-private/frs_2022_23.h5@1.40.4" diff --git a/tests/test_uk_regions.py b/tests/test_uk_regions.py index b8f92ffc..d005a44f 100644 --- a/tests/test_uk_regions.py +++ b/tests/test_uk_regions.py @@ -77,7 +77,7 @@ def test__given_uk_registry__then_has_national_region(self): assert national.region_type == "national" assert ( national.dataset_path - == "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@655dd07e4bb9c777b00dac044949611f1feb824f" + == "hf://policyengine/populace-uk-private/populace_uk_2023.h5@populace-uk-2023-72aeefc-20260611" ) assert not national.requires_filter diff --git a/uv.lock b/uv.lock index 7db442e2..88fa51fe 100644 --- a/uv.lock +++ b/uv.lock @@ -2820,7 +2820,7 @@ wheels = [ [[package]] name = "policyengine" -version = "4.16.1" +version = "4.16.2" source = { editable = "." } dependencies = [ { name = "diskcache" }, @@ -2893,7 +2893,7 @@ requires-dist = [ { name = "plotly", marker = "extra == 'dev'", specifier = ">=5.0.0" }, { name = "plotly", marker = "extra == 'plotting'", specifier = ">=5.0.0" }, { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.27.1" }, - { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.26.1" }, + { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.27.1" }, { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.27.1" }, { name = "policyengine-uk", marker = "extra == 'dev'", specifier = "==2.88.20" }, { name = "policyengine-uk", marker = "extra == 'uk'", specifier = "==2.88.20" }, From f6f0a28b8cd616ae5c33f61396ad2f12e1a912e5 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 14 Jun 2026 07:41:55 -0400 Subject: [PATCH 2/2] Surface UK preservation DOI on the certified bundle TRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-pin the UK certification to the data release manifest revision that records the Zenodo preservation DOI (10.5281/zenodo.20678518), so the shipped uk.trace.tro.jsonld now carries pe:preservationDoi. Adds the same TRO emitter as #405 (build_trace_tro_from_release_bundle surfaces DataReleaseManifest.preservation_dois) — the hunk is shared with that PR and reconciles on merge. Also fixes a latent bug in regenerate_trace_tro: it now clears the manifest lru_caches before reading, so a certification run that rewrites the country manifest immediately before regenerating the TRO cannot emit a stale TRO that lags the pin it just wrote. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...pulace-uk-2023-72aeefc-20260611.changed.md | 2 +- .../data/release_manifests/uk.json | 2 +- .../release_manifests/uk.trace.tro.jsonld | 11 +++++++---- src/policyengine/provenance/bundle.py | 8 ++++++++ src/policyengine/provenance/trace.py | 8 ++++++++ tests/test_trace_tro.py | 19 +++++++++++++++++++ 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md b/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md index b03575b3..ebb7bc8b 100644 --- a/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md +++ b/changelog.d/certify-uk-populace-uk-2023-72aeefc-20260611.changed.md @@ -1 +1 @@ -Certify the UK data release `populace-uk-2023-72aeefc-20260611` (populace_uk_2023, policyengine-uk 2.88.20) directly from its data release manifest. +Certify the UK data release `populace-uk-2023-72aeefc-20260611` (populace_uk_2023, policyengine-uk 2.88.20) directly from its data release manifest, and surface its Zenodo preservation DOI (10.5281/zenodo.20678518) on the bundle TRACE TRO. diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json index 90b2d054..99aacab0 100644 --- a/src/policyengine/data/release_manifests/uk.json +++ b/src/policyengine/data/release_manifests/uk.json @@ -21,7 +21,7 @@ "data_package": { "name": "populace-data", "release_manifest_path": "releases/populace-uk-2023-72aeefc-20260611/release_manifest.json", - "release_manifest_revision": "2bab9dfc38e8b28b52e3461510f6862b3557db5f", + "release_manifest_revision": "072b4067f3477d51b5023ae5250c9a1ab3b60164", "repo_id": "policyengine/populace-uk-private", "repo_type": "dataset", "version": "0.1.0" diff --git a/src/policyengine/data/release_manifests/uk.trace.tro.jsonld b/src/policyengine/data/release_manifests/uk.trace.tro.jsonld index 0e267ba3..ce488829 100644 --- a/src/policyengine/data/release_manifests/uk.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/uk.trace.tro.jsonld @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-uk-private/resolve/f9f713e3e9ff3d153097a1f8cf515dd506e3a1da/releases/populace-uk-2023-72aeefc-20260611/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-uk-private/resolve/072b4067f3477d51b5023ae5250c9a1ab3b60164/releases/populace-uk-2023-72aeefc-20260611/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -75,14 +75,14 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for uk", "trov:mimeType": "application/json", - "trov:sha256": "770c4f0a4064f3383e2dc23c4e21bacd04ff54c4fc7181070fef6a7b1fecc3bc" + "trov:sha256": "b722a3d3faa604f0b01e64758cfd2836b1753860aef3abb942f5bad2a72a8763" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", "schema:name": "populace-data release manifest 0.1.0", "trov:mimeType": "application/json", - "trov:sha256": "2da838b46af75177a4b0b5e13a1827f5001773781acb3a32d7939adeca3a3e83" + "trov:sha256": "ac1e5126109164c90c9a70e29531a53583e7a9047ef6678f31b459c93f48428b" }, { "@id": "composition/1/artifact/dataset", @@ -102,7 +102,7 @@ "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "940b77cd761050bb7c84f87515d7a35ed8d8498b37e384116eaae931d71c84a8" + "trov:sha256": "a0549aa701b7f6897e51b7a4e4397e75493995bcbc8c17a8420a5063ed0f0ef2" } }, "trov:hasPerformance": { @@ -114,6 +114,9 @@ "pe:compatibilityBasis": "compatible_model_packages", "pe:dataBuildId": "populace-uk-2023-72aeefc-20260611", "pe:emittedIn": "local", + "pe:preservationDoi": [ + "10.5281/zenodo.20678518" + ], "rdfs:comment": "Certification of build populace-uk-2023-72aeefc-20260611 for policyengine-uk 2.88.20.", "trov:accessedArrangement": { "@id": "arrangement/1" diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index e54186fa..b5456135 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -733,6 +733,14 @@ def regenerate_trace_tro(country: str, manifest_dir: Path = MANIFEST_DIR) -> Pat serialize_trace_tro, ) + # Read the current on-disk country manifest and the live data-release + # manifest, not values a caller cached earlier in this process. + # Certification rewrites the country manifest immediately before + # calling this, so a stale cache would emit a TRO that lags the pin + # it just wrote (e.g. missing a freshly recorded preservation DOI). + get_release_manifest.cache_clear() + get_data_release_manifest.cache_clear() + release = get_release_manifest(country) try: data_release = get_data_release_manifest(country) diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index ff0e39dc..a0196af9 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -457,6 +457,14 @@ def build_trace_tro_from_release_bundle( ) if data_release_manifest is None: performance["pe:dataReleaseManifestStatus"] = "unavailable" + elif data_release_manifest.preservation_dois: + # The release was mirrored to a DOI-minting preservation host + # (e.g. Zenodo). Surface the record-level DOI so a reader who + # has only this TRO can find and cite the preserved copy if the + # primary host ever becomes unavailable. + performance["pe:preservationDoi"] = list( + data_release_manifest.preservation_dois + ) tro_node = _assemble_tro_node( tro_name=f"policyengine {country_manifest.country_id} certified bundle TRO", diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index edabac77..8f9d1398 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -390,6 +390,25 @@ def test__given_certification__then_fields_are_machine_readable( == country_manifest.certification.data_build_id ) + def test__given_no_preservation_dois__then_performance_omits_the_field( + self, us_bundle_tro + ): + performance = us_bundle_tro["@graph"][0]["trov:hasPerformance"] + assert "pe:preservationDoi" not in performance + + def test__given_preservation_dois__then_performance_records_them(self, monkeypatch): + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + manifest = _us_data_release_manifest().model_copy( + update={"preservation_dois": ["10.5281/zenodo.20678516"]} + ) + tro = build_trace_tro_from_release_bundle( + get_release_manifest("us"), + manifest, + fetch_pypi=_fake_fetch_pypi, + ) + performance = tro["@graph"][0]["trov:hasPerformance"] + assert performance["pe:preservationDoi"] == ["10.5281/zenodo.20678516"] + def test__given_github_actions_env__then_emitted_in_is_ci(self, monkeypatch): monkeypatch.setenv("GITHUB_ACTIONS", "true") monkeypatch.setenv("GITHUB_SERVER_URL", "https://github.com")