diff --git a/changelog.d/populace-us-20260616.fixed.md b/changelog.d/populace-us-20260616.fixed.md new file mode 100644 index 00000000..01afa588 --- /dev/null +++ b/changelog.d/populace-us-20260616.fixed.md @@ -0,0 +1 @@ +Certify the Populace-US data release `populace-us-2024-a912aea-76666318a202-20260616T175345Z` and keep data-only release refreshes aligned with release-scoped diagnostics artifacts. diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index 306241c0..2d3df101 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -2,54 +2,66 @@ "bundle_id": "us-4.17.4", "certification": { "built_with_model_version": "1.729.0", - "certified_by": "policyengine.py certification", + "certified_by": "populace-data release manifest", "certified_for_model_version": "1.729.0", - "compatibility_basis": "built_with_model_package", - "data_build_id": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z" + "compatibility_basis": "exact_build_model_version", + "data_build_id": "populace-us-2024-a912aea-76666318a202-20260616T175345Z" }, "certified_data_artifact": { - "build_id": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", + "build_id": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", "data_package": { "name": "populace-data", "version": "0.1.0" }, "dataset": "populace_us_2024", - "sha256": "a912aea0ca0c27bb5225516bf859d4937cad0579a2f3971d7306086bc61cbfd7", - "uri": "hf://policyengine/populace-us/populace_us_2024.h5@populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z" + "sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92", + "uri": "hf://policyengine/populace-us/populace_us_2024.h5@c4e2fd454ddce0e1889ab77abff178a7bdd72b18" }, "country_id": "us", "data_package": { "name": "populace-data", - "release_manifest_path": "releases/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/release_manifest.json", - "release_manifest_revision": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", + "release_manifest_path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/release_manifest.json", + "release_manifest_revision": "c4e2fd454ddce0e1889ab77abff178a7bdd72b18", "repo_id": "policyengine/populace-us", "repo_type": "dataset", "version": "0.1.0" }, "datasets": { "calibration_diagnostics": { - "path": "releases/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/calibration_diagnostics.json", + "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/calibration_diagnostics.json", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", - "sha256": "4c363816bce3decabb392c6d61002420fd60de85beec30022c3d81684765763b" + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "sha256": "154a1b217211d92c50e0fb84750888920cf8a63afcf9437efa85e484a7d501c9" }, "populace_us_2024": { "path": "populace_us_2024.h5", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", - "sha256": "a912aea0ca0c27bb5225516bf859d4937cad0579a2f3971d7306086bc61cbfd7" + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92" }, "populace_us_2024_calibration": { "path": "populace_us_2024_calibration.npz", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", - "sha256": "f701d36341c5d6c39862acef14db074027bdcd289d3523571020c94892f33ccb" + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "sha256": "0679dd35dbb198164beee6d56626af2b2fb57d6a3b6ea6511daf908e66296175" }, "us_source_coverage": { - "path": "releases/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/us_source_coverage.json", + "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/us_source_coverage.json", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", - "sha256": "66c3749e39d324ae892b3482341468b6205d06e69140c19b7c5136f69e0cb0b3" + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "sha256": "233a87ccc1c1eb8ed95321b7ebe586cd483e4e5af37686e182803f3b88edc76d" + }, + "reform_validation": { + "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/reform_validation.json", + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "repo_id": "policyengine/populace-us", + "sha256": "266851a23595eb832fdc3a88453fd60dfc12cd258e557b4ae7fd92b19eeb4f9e" + }, + "demographics": { + "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/demographics.json", + "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "repo_id": "policyengine/populace-us", + "sha256": "073b9edf5a0594ab9e24c8aaae2a981a09ae510869a3f6c6cec3bb5500914897" } }, "default_dataset": "populace_us_2024", diff --git a/src/policyengine/data/release_manifests/us.trace.tro.jsonld b/src/policyengine/data/release_manifests/us.trace.tro.jsonld index 904a0591..0630cc95 100644 --- a/src/policyengine/data/release_manifests/us.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/us.trace.tro.jsonld @@ -17,7 +17,7 @@ "schema:name": "PolicyEngine", "schema:url": "https://policyengine.org" }, - "schema:dateCreated": "2026-06-16T13:15:11.179501+00:00", + "schema:dateCreated": "2026-06-16T18:48:19.511357+00:00", "schema:description": "TRACE TRO for certified runtime bundle us-4.17.4 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.", "schema:name": "policyengine us certified bundle TRO", "trov:createdWith": { @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/releases/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/c4e2fd454ddce0e1889ab77abff178a7bdd72b18/releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -53,7 +53,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/dataset" }, - "trov:hasLocation": "https://huggingface.co/policyengine/populace-us/resolve/populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z/populace_us_2024.h5" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/c4e2fd454ddce0e1889ab77abff178a7bdd72b18/populace_us_2024.h5" }, { "@id": "arrangement/1/location/model_wheel", @@ -75,21 +75,21 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for us", "trov:mimeType": "application/json", - "trov:sha256": "e9f9542464d4a73257b0a926463676122ac860ec5e217393d5c043bbc3185444" + "trov:sha256": "b7546cfb871cebc6a4ded1c5f6e1c5075e018577ccef92f8b9a32ed7faf0756e" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", "schema:name": "populace-data release manifest 0.1.0", "trov:mimeType": "application/json", - "trov:sha256": "d2506e739a20ab4cdb8a709a93fb9c766e36566028e36b946e5f2f7d6714c061" + "trov:sha256": "b50925148419218dec45800baa1c3c683b966a622a8cb281e49410b65c92d099" }, { "@id": "composition/1/artifact/dataset", "@type": "trov:ResearchArtifact", "schema:name": "populace_us_2024", "trov:mimeType": "application/x-hdf5", - "trov:sha256": "a912aea0ca0c27bb5225516bf859d4937cad0579a2f3971d7306086bc61cbfd7" + "trov:sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92" }, { "@id": "composition/1/artifact/model_wheel", @@ -102,26 +102,23 @@ "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "9ec19a4d5c7074985ba476ebf0f7dff1eb4edb096a7e69857fa53f3c05bed2b5" + "trov:sha256": "575393c273989d893e6f1534b730f17896f652fcdb54f7f2057421794d0dd31b" } }, "trov:hasPerformance": { "@id": "trp/1", "@type": "trov:TransparentResearchPerformance", "pe:builtWithModelVersion": "1.729.0", - "pe:certifiedBy": "policyengine.py certification", + "pe:certifiedBy": "populace-data release manifest", "pe:certifiedForModelVersion": "1.729.0", - "pe:ciGitRef": "refs/heads/main", - "pe:ciGitSha": "e27453d8e0532d2394896570b7a1383f96930c69", - "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27622227038", - "pe:compatibilityBasis": "built_with_model_package", - "pe:dataBuildId": "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z", - "pe:emittedIn": "github-actions", - "rdfs:comment": "Certification of build populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z for policyengine-us 1.729.0.", + "pe:compatibilityBasis": "exact_build_model_version", + "pe:dataBuildId": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "pe:emittedIn": "local", + "rdfs:comment": "Certification of build populace-us-2024-a912aea-76666318a202-20260616T175345Z for policyengine-us 1.729.0.", "trov:accessedArrangement": { "@id": "arrangement/1" }, - "trov:startedAtTime": "2026-06-16T13:15:11.179501+00:00", + "trov:startedAtTime": "2026-06-16T18:48:19.511357+00:00", "trov:wasConductedBy": { "@id": "trs" } diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index 9c519e7e..b6be5c16 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -36,6 +36,7 @@ import hashlib import json import os +import posixpath import re from dataclasses import dataclass from pathlib import Path @@ -258,6 +259,35 @@ def _metadata_sidecar_path(path: str) -> str: return f"{path}.metadata.json" +def _release_scoped_artifact_path( + artifact: dict, + *, + release_manifest_path: str | None, + data_repo_id: str, +) -> str: + """Return the dereferenceable country-manifest path for a release artifact. + + Populace release manifests describe diagnostics relative to the release + directory, while the HF files are published under ``releases/{id}/``. + Runtime manifests must store the dereferenceable path. + """ + path = artifact.get("path", "") + if not path: + return path + if not release_manifest_path: + return path + release_dir = posixpath.dirname(release_manifest_path) + if ( + release_dir + and release_dir != "." + and artifact.get("kind") == "diagnostics" + and artifact.get("repo_id") == data_repo_id + and not path.startswith(f"{release_dir}/") + ): + return f"{release_dir}/{path}" + return path + + def _specifier_matches(*, version: str, specifier: str) -> bool: try: return Version(version) in SpecifierSet(specifier) @@ -324,6 +354,9 @@ def _release_manifest_compatibility_basis( def _refresh_dataset_path_references_from_data_release( manifest_json: dict, release_manifest_json: dict, + *, + release_manifest_path: str | None = None, + data_repo_id: str, ) -> None: """Refresh bundled dataset hash pins from a data release manifest. @@ -332,58 +365,89 @@ def _refresh_dataset_path_references_from_data_release( ``datasets``; notably the US long-term bundle stores one entry per year with both H5 and metadata-sidecar hashes. """ - for path_reference in manifest_json.get("datasets", {}).values(): - path = path_reference.get("path") - if not path: - continue - if path_reference.get("revision"): - continue - artifact = _release_artifact_by_path(release_manifest_json, path) - if artifact is None: - if "sha256" in path_reference or "metadata_sha256" in path_reference: - raise ValueError( - "Data release manifest is missing dataset artifact " - f"for existing pinned path {path!r}; refusing to leave " - "stale dataset hash pins in place." - ) - continue - if artifact.get("path"): - path_reference["path"] = artifact["path"] - path = artifact["path"] + datasets = manifest_json.setdefault("datasets", {}) + release_artifacts = release_manifest_json.get("artifacts", {}) + + def update_reference_from_artifact(path_reference: dict, artifact: dict) -> None: + raw_path = artifact.get("path") + if raw_path: + path_reference["path"] = _release_scoped_artifact_path( + artifact, + release_manifest_path=release_manifest_path, + data_repo_id=data_repo_id, + ) + if artifact.get("revision"): + path_reference["revision"] = artifact["revision"] + if artifact.get("repo_id"): + path_reference["repo_id"] = artifact["repo_id"] + if artifact.get("repo_type"): + path_reference["repo_type"] = artifact["repo_type"] + dataset_sha256 = artifact.get("sha256") if dataset_sha256: path_reference["sha256"] = dataset_sha256 elif "sha256" in path_reference: raise ValueError( "Data release manifest dataset artifact lacks sha256 " - f"for existing pinned path {path!r}; refusing to leave " + f"for existing pinned path {raw_path!r}; refusing to leave " "stale dataset hash pin in place." ) + if not raw_path: + return metadata_artifact = _release_artifact_by_path( release_manifest_json, - _metadata_sidecar_path(path), + _metadata_sidecar_path(raw_path), ) had_metadata_pin = "metadata_sha256" in path_reference if metadata_artifact is None: if had_metadata_pin: raise ValueError( "Data release manifest is missing metadata sidecar artifact " - f"for {path!r}; refusing to drop existing metadata hash pin." + f"for {raw_path!r}; refusing to drop existing metadata hash pin." ) path_reference.pop("metadata_sha256", None) - continue + return metadata_sha256 = metadata_artifact.get("sha256") if not metadata_sha256: if had_metadata_pin: raise ValueError( "Data release manifest metadata sidecar artifact lacks sha256 " - f"for {path!r}; refusing to drop existing metadata hash pin." + f"for {raw_path!r}; refusing to drop existing metadata hash pin." ) path_reference.pop("metadata_sha256", None) - continue + return path_reference["metadata_sha256"] = metadata_sha256 + for name, path_reference in datasets.items(): + named_artifact = release_artifacts.get(name) + if named_artifact is not None: + update_reference_from_artifact(path_reference, named_artifact) + continue + + path = path_reference.get("path") + if not path: + continue + if path_reference.get("revision"): + continue + artifact = _release_artifact_by_path(release_manifest_json, path) + if artifact is None: + if "sha256" in path_reference or "metadata_sha256" in path_reference: + raise ValueError( + "Data release manifest is missing dataset artifact " + f"for existing pinned path {path!r}; refusing to leave " + "stale dataset hash pins in place." + ) + continue + update_reference_from_artifact(path_reference, artifact) + + for name, artifact in release_artifacts.items(): + if name in datasets: + continue + path_reference: dict = {} + update_reference_from_artifact(path_reference, artifact) + datasets[name] = path_reference + # --------------------------------------------------------------------------- # Refresh result @@ -657,6 +721,8 @@ def refresh_release_bundle( _refresh_dataset_path_references_from_data_release( manifest_json, release_manifest_json, + release_manifest_path=new_release_manifest_path, + data_repo_id=repo_id, ) manifest_path.write_text( diff --git a/src/policyengine/provenance/manifest.py b/src/policyengine/provenance/manifest.py index 3d44a11e..97e8d21b 100644 --- a/src/policyengine/provenance/manifest.py +++ b/src/policyengine/provenance/manifest.py @@ -188,9 +188,16 @@ def build_hf_uri(repo_id: str, path_in_repo: str, revision: str) -> str: return f"hf://{repo_id}/{path_in_repo}@{revision}" -def https_dataset_uri(repo_id: str, path_in_repo: str, revision: str) -> str: +def https_dataset_uri( + repo_id: str, + path_in_repo: str, + revision: str, + *, + repo_type: str = "model", +) -> str: """Return a dereferenceable HTTPS URI for a Hugging Face dataset artifact.""" - return f"https://huggingface.co/{repo_id}/resolve/{revision}/{path_in_repo}" + prefix = "datasets/" if repo_type == "dataset" else "" + return f"https://huggingface.co/{prefix}{repo_id}/resolve/{revision}/{path_in_repo}" def _artifact_revision(data_package: "DataPackageVersion") -> str: diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index ff0e39dc..8b919bdf 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -69,7 +69,7 @@ def _artifact_mime_type(path_or_uri: str) -> Optional[str]: return _MIME_TYPES.get(suffix) -def _dataset_location_from_uri(uri: str) -> str: +def _dataset_location_from_uri(uri: str, *, repo_type: str = "model") -> str: if not uri.startswith("hf://"): return uri @@ -85,6 +85,7 @@ def _dataset_location_from_uri(uri: str) -> str: repo_id=repo_id, path_in_repo=parts[2], revision=revision, + repo_type=repo_type, ) @@ -355,7 +356,10 @@ def build_trace_tro_from_release_bundle( if data_release_manifest is not None else None ) - dataset_location = _dataset_location_from_uri(certified_artifact.uri) + dataset_location = _dataset_location_from_uri( + certified_artifact.uri, + repo_type=country_manifest.data_package.repo_type, + ) bundle_manifest_hash = hashlib.sha256( canonical_json_bytes(country_manifest.model_dump(mode="json")) diff --git a/tests/test_bundle_refresh.py b/tests/test_bundle_refresh.py index 3b142691..5e491684 100644 --- a/tests/test_bundle_refresh.py +++ b/tests/test_bundle_refresh.py @@ -805,6 +805,160 @@ def fake_urlopen(request, *args, **kwargs): ) +def test__same_data_version_release_override_refreshes_revisioned_populace_artifacts( + sandbox, +) -> None: + manifest_path = sandbox["manifest_dir"] / "us.json" + manifest = json.loads(manifest_path.read_text()) + old_release = "populace-us-2024-old" + new_release = "populace-us-2024-new" + manifest["data_package"] = { + "name": "populace-data", + "version": "0.1.0", + "repo_id": "policyengine/populace-us", + "repo_type": "dataset", + "release_manifest_path": f"releases/{old_release}/release_manifest.json", + "release_manifest_revision": old_release, + } + manifest["certified_data_artifact"] = { + "data_package": {"name": "populace-data", "version": "0.1.0"}, + "build_id": old_release, + "dataset": "populace_us_2024", + "uri": f"hf://policyengine/populace-us/populace_us_2024.h5@{old_release}", + "sha256": "1" * 64, + } + manifest["certification"] = { + "compatibility_basis": "exact_build_model_version", + "data_build_id": old_release, + "built_with_model_version": "1.600.0", + "certified_for_model_version": "1.600.0", + "certified_by": "test fixture", + } + manifest["default_dataset"] = "populace_us_2024" + manifest["datasets"] = { + "populace_us_2024": { + "path": "populace_us_2024.h5", + "repo_id": "policyengine/populace-us", + "revision": old_release, + "sha256": "1" * 64, + }, + "populace_us_2024_calibration": { + "path": "populace_us_2024_calibration.npz", + "repo_id": "policyengine/populace-us", + "revision": old_release, + "sha256": "2" * 64, + }, + "calibration_diagnostics": { + "path": f"releases/{old_release}/calibration_diagnostics.json", + "repo_id": "policyengine/populace-us", + "revision": old_release, + "sha256": "3" * 64, + }, + "external_long_term": { + "path": "long_term/2100.h5", + "repo_id": "policyengine/policyengine-us-data", + "revision": "crfb-longrun-20260517", + "sha256": "4" * 64, + }, + } + manifest_path.write_text(json.dumps(manifest, indent=2)) + + payload = { + "schema_version": 1, + "data_package": {"name": "populace-data", "version": "0.1.0"}, + "build": { + "build_id": new_release, + "built_with_model_package": { + "name": "policyengine-us", + "version": "1.600.0", + }, + }, + "artifacts": { + "populace_us_2024": { + "kind": "microdata", + "path": "populace_us_2024.h5", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "5" * 64, + }, + "populace_us_2024_calibration": { + "kind": "calibration", + "path": "populace_us_2024_calibration.npz", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "6" * 64, + }, + "calibration_diagnostics": { + "kind": "diagnostics", + "path": "calibration_diagnostics.json", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "7" * 64, + }, + "reform_validation": { + "kind": "diagnostics", + "path": "reform_validation.json", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "8" * 64, + }, + }, + } + + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if ( + f"/resolve/{new_release}/releases/{new_release}/release_manifest.json" + in url + ): + return _FakeHFResponse( + json.dumps(payload).encode(), + headers={"x-repo-commit": "new-release-manifest-commit"}, + ) + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + refresh_release_bundle( + country="us", + data_version="0.1.0", + release_manifest_path=f"releases/{new_release}/release_manifest.json", + release_manifest_revision=new_release, + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) + assert written["certified_data_artifact"]["build_id"] == new_release + assert ( + written["certified_data_artifact"]["uri"] + == "hf://policyengine/populace-us/populace_us_2024.h5@new-release-manifest-commit" + ) + assert written["datasets"]["populace_us_2024"] == { + "path": "populace_us_2024.h5", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "5" * 64, + } + assert written["datasets"]["calibration_diagnostics"] == { + "path": f"releases/{new_release}/calibration_diagnostics.json", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "7" * 64, + } + assert written["datasets"]["reform_validation"] == { + "path": f"releases/{new_release}/reform_validation.json", + "repo_id": "policyengine/populace-us", + "revision": new_release, + "sha256": "8" * 64, + } + assert written["datasets"]["external_long_term"] == { + "path": "long_term/2100.h5", + "repo_id": "policyengine/policyengine-us-data", + "revision": "crfb-longrun-20260517", + "sha256": "4" * 64, + } + + def test__custom_release_manifest_requires_existing_long_term_dataset_sha( sandbox, ) -> None: diff --git a/tests/test_models.py b/tests/test_models.py index 0927e6f9..1bf93df2 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -119,7 +119,7 @@ def test_has_release_manifest_metadata(self): assert ( us_latest.default_dataset_uri == "hf://policyengine/populace-us/populace_us_2024.h5" - "@populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z" + "@c4e2fd454ddce0e1889ab77abff178a7bdd72b18" ) def test_has_hundreds_of_parameters(self): diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index a92b6e4c..bdfb688d 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -44,13 +44,17 @@ US_MODEL_VERSION = "1.729.0" US_BUILT_WITH_MODEL_VERSION = "1.729.0" US_DATA_RELEASE_VERSION = "0.1.0" -US_DATA_RELEASE_REVISION = "populace-us-2024-f32c2e5-0c38bc47db89-20260616T124451Z" -US_DATA_RELEASE_PATH = f"releases/{US_DATA_RELEASE_REVISION}/release_manifest.json" -US_DATA_ARTIFACT_REVISION = US_DATA_RELEASE_REVISION -US_CERTIFICATION_SOURCE = "policyengine.py certification" +US_DATA_RELEASE_ID = "populace-us-2024-a912aea-76666318a202-20260616T175345Z" +US_DATA_RELEASE_REVISION = "c4e2fd454ddce0e1889ab77abff178a7bdd72b18" +US_DATA_RELEASE_PATH = f"releases/{US_DATA_RELEASE_ID}/release_manifest.json" +US_DATA_ARTIFACT_REVISION = US_DATA_RELEASE_ID +US_CERTIFICATION_SOURCE = "populace-data release manifest" US_MANAGED_DATASET_URI = ( f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_ARTIFACT_REVISION}" ) +US_CERTIFIED_DATASET_URI = ( + f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}" +) US_RELEASE_MANIFEST_DATASET_URI = ( f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}" ) @@ -99,11 +103,13 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): manifest.data_package.release_manifest_revision == US_DATA_RELEASE_REVISION ) assert manifest.certified_data_artifact is not None - assert manifest.certified_data_artifact.build_id == US_DATA_RELEASE_REVISION + assert manifest.certified_data_artifact.build_id == US_DATA_RELEASE_ID assert manifest.certified_data_artifact.dataset == "populace_us_2024" + assert manifest.certified_data_artifact.uri == US_CERTIFIED_DATASET_URI assert manifest.certification is not None - assert manifest.certification.data_build_id == US_DATA_RELEASE_REVISION - assert manifest.certification.compatibility_basis == "built_with_model_package" + assert manifest.certification.data_build_id == US_DATA_RELEASE_ID + assert manifest.certification.compatibility_basis == "exact_build_model_version" + assert manifest.certification.certified_by == US_CERTIFICATION_SOURCE assert ( manifest.certification.built_with_model_version == US_BUILT_WITH_MODEL_VERSION diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index edabac77..aa706db4 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -55,7 +55,7 @@ def _fake_fetch_pypi(name: str, version: str) -> dict: return {"sha256": FAKE_WHEEL_SHA, "url": FAKE_WHEEL_URL} -def _https_location_from_hf_uri(uri: str) -> str: +def _https_location_from_hf_uri(uri: str, *, repo_type: str = "model") -> str: path_without_scheme = uri.removeprefix("hf://") path_without_revision, revision = path_without_scheme.rsplit("@", 1) repo_owner, repo_name, path_in_repo = path_without_revision.split("/", 2) @@ -63,6 +63,7 @@ def _https_location_from_hf_uri(uri: str) -> str: repo_id=f"{repo_owner}/{repo_name}", path_in_repo=path_in_repo, revision=revision, + repo_type=repo_type, ) @@ -304,7 +305,11 @@ def test__given_data_manifest_revision_is_unresolvable__then_dataset_location_us ) assert country_manifest.certified_data_artifact is not None assert dataset_location["trov:hasLocation"] == _https_location_from_hf_uri( - country_manifest.certified_data_artifact.uri + country_manifest.certified_data_artifact.uri, + repo_type=country_manifest.data_package.repo_type, + ) + assert dataset_location["trov:hasLocation"].startswith( + "https://huggingface.co/datasets/policyengine/populace-us/" ) assert "/resolve/1.113.1/" not in dataset_location["trov:hasLocation"]