diff --git a/changelog.d/410.changed.md b/changelog.d/410.changed.md new file mode 100644 index 00000000..18d816f5 --- /dev/null +++ b/changelog.d/410.changed.md @@ -0,0 +1 @@ +Refresh the bundled US model pin to policyengine-us 1.726.0 and teach the release-bundle refresh helper to fetch data-release manifests from Hugging Face dataset repos. diff --git a/pyproject.toml b/pyproject.toml index ab842ddb..fc4acdf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ uk = [ ] us = [ "policyengine_core>=3.27.1", - "policyengine-us==1.723.0", + "policyengine-us==1.726.0", ] dev = [ "pytest", @@ -64,7 +64,7 @@ dev = [ "ruff>=0.9.0", "policyengine_core>=3.27.1", "policyengine-uk==2.88.20", - "policyengine-us==1.723.0", + "policyengine-us==1.726.0", "towncrier>=24.8.0", "mypy>=1.11.0", "pytest-cov>=5.0.0", diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index c66ad459..85329c44 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -2,9 +2,9 @@ "bundle_id": "us-4.17.0", "certification": { "built_with_model_version": "1.723.0", - "certified_by": "policyengine.py certification", - "certified_for_model_version": "1.723.0", - "compatibility_basis": "built_with_model_package", + "certified_by": "populace-data release manifest", + "certified_for_model_version": "1.726.0", + "compatibility_basis": "legacy_compatible_model_package", "data_build_id": "populace-us-2024-5da5a95-20260611" }, "certified_data_artifact": { @@ -21,7 +21,7 @@ "data_package": { "name": "populace-data", "release_manifest_path": "releases/populace-us-2024-5da5a95-20260611/release_manifest.json", - "release_manifest_revision": "populace-us-2024-5da5a95-20260611", + "release_manifest_revision": "afa2748b79a139404a1c477e3a7a9ca8729c22ab", "repo_id": "policyengine/populace-us", "repo_type": "dataset", "version": "0.1.0" @@ -3457,9 +3457,9 @@ "default_dataset": "populace_us_2024", "model_package": { "name": "policyengine-us", - "sha256": "d4104858e36ef20fb33a53c7e09e2174a46d1483d1ebe557e2151c95864c7fab", - "version": "1.723.0", - "wheel_url": "https://files.pythonhosted.org/packages/fb/9e/8611bf46cf488981f0450bbfda8df8c9fc3d21e7bb5fc4398da9890b3d3c/policyengine_us-1.723.0-py3-none-any.whl" + "sha256": "1acc74d1f5431f7872c5500ad93604455aa7b98bcc87399218bc459b393386be", + "version": "1.726.0", + "wheel_url": "https://files.pythonhosted.org/packages/27/dd/a6a62280c6289e6da669410e97c90da99e6585c39d4fa6fe81b9910a1e3c/policyengine_us-1.726.0-py3-none-any.whl" }, "policyengine_version": "4.17.0", "region_datasets": { diff --git a/src/policyengine/data/release_manifests/us.trace.tro.jsonld b/src/policyengine/data/release_manifests/us.trace.tro.jsonld index cff63742..76345176 100644 --- a/src/policyengine/data/release_manifests/us.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/us.trace.tro.jsonld @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/populace-us-2024-5da5a95-20260611/releases/populace-us-2024-5da5a95-20260611/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/afa2748b79a139404a1c477e3a7a9ca8729c22ab/releases/populace-us-2024-5da5a95-20260611/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -61,7 +61,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/model_wheel" }, - "trov:hasLocation": "https://files.pythonhosted.org/packages/fb/9e/8611bf46cf488981f0450bbfda8df8c9fc3d21e7bb5fc4398da9890b3d3c/policyengine_us-1.723.0-py3-none-any.whl" + "trov:hasLocation": "https://files.pythonhosted.org/packages/27/dd/a6a62280c6289e6da669410e97c90da99e6585c39d4fa6fe81b9910a1e3c/policyengine_us-1.726.0-py3-none-any.whl" } ] } @@ -75,14 +75,14 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for us", "trov:mimeType": "application/json", - "trov:sha256": "3de732f64daf05e2fabb81a4a82a692bd111249417b782b9c3efe0915d799b65" + "trov:sha256": "ec431be2dac3d0276756345d506373544c8d1efc2700caadba8142ff1ed2c849" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", "schema:name": "populace-data release manifest 0.1.0", "trov:mimeType": "application/json", - "trov:sha256": "c2e51b22d84760923c7750f88f2cc0c0e22620abee54a58c84af93f6cf4a3506" + "trov:sha256": "e1870fdef83d0cb5caef38187face74b7335bd6aae56e18d5ab715331e35e744" }, { "@id": "composition/1/artifact/dataset", @@ -94,30 +94,27 @@ { "@id": "composition/1/artifact/model_wheel", "@type": "trov:ResearchArtifact", - "schema:name": "policyengine-us==1.723.0 wheel", + "schema:name": "policyengine-us==1.726.0 wheel", "trov:mimeType": "application/zip", - "trov:sha256": "d4104858e36ef20fb33a53c7e09e2174a46d1483d1ebe557e2151c95864c7fab" + "trov:sha256": "1acc74d1f5431f7872c5500ad93604455aa7b98bcc87399218bc459b393386be" } ], "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "a395a9fb14a36537f32fb4ae2b6c9325ae24930051d5b223f9c47ef2f3d3ae55" + "trov:sha256": "bd16e54a750a9a507538933b4b78febb1f5366ce4c5f32ba47d29daf39e09b91" } }, "trov:hasPerformance": { "@id": "trp/1", "@type": "trov:TransparentResearchPerformance", "pe:builtWithModelVersion": "1.723.0", - "pe:certifiedBy": "policyengine.py certification", - "pe:certifiedForModelVersion": "1.723.0", - "pe:ciGitRef": "refs/heads/main", - "pe:ciGitSha": "c2c910923620db84dabedc3a36e398ae1c0530bb", - "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27504864442", - "pe:compatibilityBasis": "built_with_model_package", + "pe:certifiedBy": "populace-data release manifest", + "pe:certifiedForModelVersion": "1.726.0", + "pe:compatibilityBasis": "legacy_compatible_model_package", "pe:dataBuildId": "populace-us-2024-5da5a95-20260611", - "pe:emittedIn": "github-actions", - "rdfs:comment": "Certification of build populace-us-2024-5da5a95-20260611 for policyengine-us 1.723.0.", + "pe:emittedIn": "local", + "rdfs:comment": "Certification of build populace-us-2024-5da5a95-20260611 for policyengine-us 1.726.0.", "trov:accessedArrangement": { "@id": "arrangement/1" }, diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index e54186fa..9c519e7e 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -188,6 +188,7 @@ def _fetch_data_release_manifest( release_manifest_path: str, revision: str, *, + repo_type: str = "model", allow_main_fallback: bool = True, ) -> Optional[_DataReleaseManifestFetch]: """Fetch a data release manifest from HF if one is available. @@ -213,8 +214,9 @@ def _fetch_data_release_manifest( revisions.append("main") for candidate in revisions: + prefix = "datasets/" if repo_type == "dataset" else "" url = ( - f"https://huggingface.co/{repo_id}/resolve/" + f"https://huggingface.co/{prefix}{repo_id}/resolve/" f"{candidate}/{release_manifest_path}" ) try: @@ -523,6 +525,7 @@ def refresh_release_bundle( repo_id=repo_id, release_manifest_path=new_release_manifest_path, revision=fetch_revision, + repo_type=data_package_json.get("repo_type", "model"), allow_main_fallback=release_manifest_revision is None, ) if release_manifest_fetch is None: diff --git a/tests/fixtures/household_calculator_snapshots/us_model_surface.json b/tests/fixtures/household_calculator_snapshots/us_model_surface.json index 69386c46..c86da61a 100644 --- a/tests/fixtures/household_calculator_snapshots/us_model_surface.json +++ b/tests/fixtures/household_calculator_snapshots/us_model_surface.json @@ -5,7 +5,7 @@ "has_income_tax": true, "has_region_registry": true, "model_package_name": "policyengine-us", - "num_parameters_bucketed_100s": 896, - "num_variables_bucketed_100s": 52, + "num_parameters_bucketed_100s": 898, + "num_variables_bucketed_100s": 53, "region_registry_country": "us" } diff --git a/tests/test_bundle_refresh.py b/tests/test_bundle_refresh.py index 6c1d8338..3b142691 100644 --- a/tests/test_bundle_refresh.py +++ b/tests/test_bundle_refresh.py @@ -264,6 +264,58 @@ def fake_urlopen(request, *args, **kwargs): ) +def test__bump_model_only_fetches_dataset_repo_release_manifest(sandbox) -> None: + manifest_path = sandbox["manifest_dir"] / "us.json" + manifest = json.loads(manifest_path.read_text()) + manifest["data_package"]["repo_id"] = "policyengine/populace-us" + manifest["data_package"]["repo_type"] = "dataset" + manifest["data_package"]["release_manifest_path"] = ( + "releases/1.70.0/release_manifest.json" + ) + manifest["certified_data_artifact"]["uri"] = ( + "hf://policyengine/populace-us/enhanced_cps_2024.h5@old-dataset-commit" + ) + manifest_path.write_text(json.dumps(manifest, indent=2)) + + def fake_urlopen(request, *args, **kwargs): + url = request.full_url + if "pypi.org" in url: + return _pypi_response("policyengine-us", "1.653.3") + if ( + url == "https://huggingface.co/datasets/policyengine/populace-us/resolve/" + "old-release-manifest-commit/releases/1.70.0/release_manifest.json" + ): + return _data_release_manifest_response( + data_version="1.70.0", + dataset_sha256="d" * 64, + compatible_model_packages=[ + {"name": "policyengine-us", "specifier": "==1.600.0"}, + {"name": "policyengine-us", "specifier": "==1.653.3"}, + ], + extra_artifacts={ + "enhanced_cps_2024": { + "kind": "microdata", + "path": "enhanced_cps_2024.h5", + "repo_id": "policyengine/policyengine-us-data", + "revision": "old-dataset-commit", + "sha256": "d" * 64, + } + }, + headers={"x-repo-commit": "old-release-manifest-commit"}, + ) + raise AssertionError(f"Unexpected URL fetched: {url}") + + with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen): + result = refresh_release_bundle( + country="us", + model_version="1.653.3", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=sandbox["pyproject_path"], + ) + + assert result.new_model == "1.653.3" + + def test__bump_model_only_requires_data_release_manifest_compatibility( sandbox, ) -> None: diff --git a/tests/test_models.py b/tests/test_models.py index bf213442..1ba84ae0 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -113,7 +113,7 @@ def test_has_release_manifest_metadata(self): assert us_latest.release_manifest is not None assert us_latest.release_manifest.country_id == "us" assert us_latest.model_package.name == "policyengine-us" - assert us_latest.model_package.version == "1.723.0" + assert us_latest.model_package.version == "1.726.0" assert us_latest.data_package.name == "populace-data" assert us_latest.data_package.version == "0.1.0" assert ( diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index a369a2a1..e625e66f 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -41,15 +41,19 @@ PYPROJECT.read_text(), re.MULTILINE, ).group(1) -US_MODEL_VERSION = "1.723.0" +US_MODEL_VERSION = "1.726.0" US_BUILT_WITH_MODEL_VERSION = "1.723.0" US_DATA_RELEASE_VERSION = "0.1.0" US_DATA_RELEASE_PATH = ( "releases/populace-us-2024-5da5a95-20260611/release_manifest.json" ) -US_DATA_RELEASE_REVISION = "populace-us-2024-5da5a95-20260611" -US_CERTIFICATION_SOURCE = "policyengine.py certification" +US_DATA_RELEASE_REVISION = "afa2748b79a139404a1c477e3a7a9ca8729c22ab" +US_DATA_ARTIFACT_REVISION = "populace-us-2024-5da5a95-20260611" +US_CERTIFICATION_SOURCE = "populace-data release manifest" US_MANAGED_DATASET_URI = ( + f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_ARTIFACT_REVISION}" +) +US_RELEASE_MANIFEST_DATASET_URI = ( f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}" ) @@ -106,7 +110,10 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): assert ( manifest.certification.data_build_id == "populace-us-2024-5da5a95-20260611" ) - assert manifest.certification.compatibility_basis == "built_with_model_package" + assert ( + manifest.certification.compatibility_basis + == "legacy_compatible_model_package" + ) assert ( manifest.certification.built_with_model_version == US_BUILT_WITH_MODEL_VERSION @@ -344,7 +351,10 @@ def test__given_release_manifest_artifact_uses_version_tag__then_rewrites_to_com ): manifest = get_data_release_manifest("us") - assert manifest.artifacts["populace_us_2024"].uri == US_MANAGED_DATASET_URI + assert ( + manifest.artifacts["populace_us_2024"].uri + == US_RELEASE_MANIFEST_DATASET_URI + ) assert ( manifest.source_sha256 == hashlib.sha256(json.dumps(payload).encode("utf-8")).hexdigest() diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py index 9b0e4295..fb1a7b9b 100644 --- a/tests/test_us_regions.py +++ b/tests/test_us_regions.py @@ -11,10 +11,8 @@ US_INHERITED_SUBNATIONAL_REVISION = "1.115.5" -def _current_us_data_revision() -> str: - revision = get_release_manifest("us").data_package.release_manifest_revision - assert revision is not None - return revision +def _current_us_default_dataset_uri() -> str: + return get_release_manifest("us").default_dataset_uri class TestUSStates: @@ -114,10 +112,7 @@ def test__given_us_registry__then_has_national_region(self): assert national.code == "us" assert national.label == "United States" assert national.region_type == "national" - assert ( - national.dataset_path == "hf://policyengine/populace-us/" - f"populace_us_2024.h5@{_current_us_data_revision()}" - ) + assert national.dataset_path == _current_us_default_dataset_uri() def test__given_us_registry__then_has_51_states(self): """Given: US region registry diff --git a/uv.lock b/uv.lock index 7db442e2..4925b730 100644 --- a/uv.lock +++ b/uv.lock @@ -2820,7 +2820,7 @@ wheels = [ [[package]] name = "policyengine" -version = "4.16.1" +version = "4.17.0" source = { editable = "." } dependencies = [ { name = "diskcache" }, @@ -2897,8 +2897,8 @@ requires-dist = [ { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.27.1" }, { name = "policyengine-uk", marker = "extra == 'dev'", specifier = "==2.88.20" }, { name = "policyengine-uk", marker = "extra == 'uk'", specifier = "==2.88.20" }, - { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.723.0" }, - { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.723.0" }, + { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.726.0" }, + { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.726.0" }, { name = "psutil", specifier = ">=5.9.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pytest", marker = "extra == 'dev'" }, @@ -2961,7 +2961,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.723.0" +version = "1.726.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2973,9 +2973,9 @@ dependencies = [ { name = "tables", version = "3.11.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/27/6d/82742b92b9cecdd8fc61ab05347812eb2dcf097d5929c9ec8b7806afa1c4/policyengine_us-1.723.0.tar.gz", hash = "sha256:52b8ca8544daa22e531f949994d1cd56c22fc9413ac0c2025b98f8e051bc72af", size = 10136247, upload-time = "2026-06-10T14:14:10.434Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/da/1153e6e05e152bf24387e4e160da25267b0056f9f7418f3c36b6ccf04a83/policyengine_us-1.726.0.tar.gz", hash = "sha256:6383099e52f23be0a69a7f0e02b3f9abd3947c8613225b3f7d06cb24b7d0e614", size = 10199011, upload-time = "2026-06-11T00:41:07.901Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/9e/8611bf46cf488981f0450bbfda8df8c9fc3d21e7bb5fc4398da9890b3d3c/policyengine_us-1.723.0-py3-none-any.whl", hash = "sha256:d4104858e36ef20fb33a53c7e09e2174a46d1483d1ebe557e2151c95864c7fab", size = 11326215, upload-time = "2026-06-10T14:14:06.61Z" }, + { url = "https://files.pythonhosted.org/packages/27/dd/a6a62280c6289e6da669410e97c90da99e6585c39d4fa6fe81b9910a1e3c/policyengine_us-1.726.0-py3-none-any.whl", hash = "sha256:1acc74d1f5431f7872c5500ad93604455aa7b98bcc87399218bc459b393386be", size = 11518993, upload-time = "2026-06-11T00:41:04.561Z" }, ] [[package]]