Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/410.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refresh the bundled US model pin to policyengine-us 1.726.0 and teach the release-bundle refresh helper to fetch data-release manifests from Hugging Face dataset repos.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ uk = [
]
us = [
"policyengine_core>=3.27.1",
"policyengine-us==1.723.0",
"policyengine-us==1.726.0",
]
dev = [
"pytest",
Expand All @@ -64,7 +64,7 @@ dev = [
"ruff>=0.9.0",
"policyengine_core>=3.27.1",
"policyengine-uk==2.88.20",
"policyengine-us==1.723.0",
"policyengine-us==1.726.0",
"towncrier>=24.8.0",
"mypy>=1.11.0",
"pytest-cov>=5.0.0",
Expand Down
14 changes: 7 additions & 7 deletions src/policyengine/data/release_manifests/us.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"bundle_id": "us-4.17.0",
"certification": {
"built_with_model_version": "1.723.0",
"certified_by": "policyengine.py certification",
"certified_for_model_version": "1.723.0",
"compatibility_basis": "built_with_model_package",
"certified_by": "populace-data release manifest",
"certified_for_model_version": "1.726.0",
"compatibility_basis": "legacy_compatible_model_package",
"data_build_id": "populace-us-2024-5da5a95-20260611"
},
"certified_data_artifact": {
Expand All @@ -21,7 +21,7 @@
"data_package": {
"name": "populace-data",
"release_manifest_path": "releases/populace-us-2024-5da5a95-20260611/release_manifest.json",
"release_manifest_revision": "populace-us-2024-5da5a95-20260611",
"release_manifest_revision": "afa2748b79a139404a1c477e3a7a9ca8729c22ab",
"repo_id": "policyengine/populace-us",
"repo_type": "dataset",
"version": "0.1.0"
Expand Down Expand Up @@ -3457,9 +3457,9 @@
"default_dataset": "populace_us_2024",
"model_package": {
"name": "policyengine-us",
"sha256": "d4104858e36ef20fb33a53c7e09e2174a46d1483d1ebe557e2151c95864c7fab",
"version": "1.723.0",
"wheel_url": "https://files.pythonhosted.org/packages/fb/9e/8611bf46cf488981f0450bbfda8df8c9fc3d21e7bb5fc4398da9890b3d3c/policyengine_us-1.723.0-py3-none-any.whl"
"sha256": "1acc74d1f5431f7872c5500ad93604455aa7b98bcc87399218bc459b393386be",
"version": "1.726.0",
"wheel_url": "https://files.pythonhosted.org/packages/27/dd/a6a62280c6289e6da669410e97c90da99e6585c39d4fa6fe81b9910a1e3c/policyengine_us-1.726.0-py3-none-any.whl"
},
"policyengine_version": "4.17.0",
"region_datasets": {
Expand Down
27 changes: 12 additions & 15 deletions src/policyengine/data/release_manifests/us.trace.tro.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"trov:hasArtifact": {
"@id": "composition/1/artifact/data_release_manifest"
},
"trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/populace-us-2024-5da5a95-20260611/releases/populace-us-2024-5da5a95-20260611/release_manifest.json"
"trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/afa2748b79a139404a1c477e3a7a9ca8729c22ab/releases/populace-us-2024-5da5a95-20260611/release_manifest.json"
},
{
"@id": "arrangement/1/location/dataset",
Expand All @@ -61,7 +61,7 @@
"trov:hasArtifact": {
"@id": "composition/1/artifact/model_wheel"
},
"trov:hasLocation": "https://files.pythonhosted.org/packages/fb/9e/8611bf46cf488981f0450bbfda8df8c9fc3d21e7bb5fc4398da9890b3d3c/policyengine_us-1.723.0-py3-none-any.whl"
"trov:hasLocation": "https://files.pythonhosted.org/packages/27/dd/a6a62280c6289e6da669410e97c90da99e6585c39d4fa6fe81b9910a1e3c/policyengine_us-1.726.0-py3-none-any.whl"
}
]
}
Expand All @@ -75,14 +75,14 @@
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine.py bundle manifest for us",
"trov:mimeType": "application/json",
"trov:sha256": "3de732f64daf05e2fabb81a4a82a692bd111249417b782b9c3efe0915d799b65"
"trov:sha256": "ec431be2dac3d0276756345d506373544c8d1efc2700caadba8142ff1ed2c849"
},
{
"@id": "composition/1/artifact/data_release_manifest",
"@type": "trov:ResearchArtifact",
"schema:name": "populace-data release manifest 0.1.0",
"trov:mimeType": "application/json",
"trov:sha256": "c2e51b22d84760923c7750f88f2cc0c0e22620abee54a58c84af93f6cf4a3506"
"trov:sha256": "e1870fdef83d0cb5caef38187face74b7335bd6aae56e18d5ab715331e35e744"
},
{
"@id": "composition/1/artifact/dataset",
Expand All @@ -94,30 +94,27 @@
{
"@id": "composition/1/artifact/model_wheel",
"@type": "trov:ResearchArtifact",
"schema:name": "policyengine-us==1.723.0 wheel",
"schema:name": "policyengine-us==1.726.0 wheel",
"trov:mimeType": "application/zip",
"trov:sha256": "d4104858e36ef20fb33a53c7e09e2174a46d1483d1ebe557e2151c95864c7fab"
"trov:sha256": "1acc74d1f5431f7872c5500ad93604455aa7b98bcc87399218bc459b393386be"
}
],
"trov:hasFingerprint": {
"@id": "composition/1/fingerprint",
"@type": "trov:CompositionFingerprint",
"trov:sha256": "a395a9fb14a36537f32fb4ae2b6c9325ae24930051d5b223f9c47ef2f3d3ae55"
"trov:sha256": "bd16e54a750a9a507538933b4b78febb1f5366ce4c5f32ba47d29daf39e09b91"
}
},
"trov:hasPerformance": {
"@id": "trp/1",
"@type": "trov:TransparentResearchPerformance",
"pe:builtWithModelVersion": "1.723.0",
"pe:certifiedBy": "policyengine.py certification",
"pe:certifiedForModelVersion": "1.723.0",
"pe:ciGitRef": "refs/heads/main",
"pe:ciGitSha": "c2c910923620db84dabedc3a36e398ae1c0530bb",
"pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27504864442",
"pe:compatibilityBasis": "built_with_model_package",
"pe:certifiedBy": "populace-data release manifest",
"pe:certifiedForModelVersion": "1.726.0",
"pe:compatibilityBasis": "legacy_compatible_model_package",
"pe:dataBuildId": "populace-us-2024-5da5a95-20260611",
"pe:emittedIn": "github-actions",
"rdfs:comment": "Certification of build populace-us-2024-5da5a95-20260611 for policyengine-us 1.723.0.",
"pe:emittedIn": "local",
"rdfs:comment": "Certification of build populace-us-2024-5da5a95-20260611 for policyengine-us 1.726.0.",
"trov:accessedArrangement": {
"@id": "arrangement/1"
},
Expand Down
5 changes: 4 additions & 1 deletion src/policyengine/provenance/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def _fetch_data_release_manifest(
release_manifest_path: str,
revision: str,
*,
repo_type: str = "model",
allow_main_fallback: bool = True,
) -> Optional[_DataReleaseManifestFetch]:
"""Fetch a data release manifest from HF if one is available.
Expand All @@ -213,8 +214,9 @@ def _fetch_data_release_manifest(
revisions.append("main")

for candidate in revisions:
prefix = "datasets/" if repo_type == "dataset" else ""
url = (
f"https://huggingface.co/{repo_id}/resolve/"
f"https://huggingface.co/{prefix}{repo_id}/resolve/"
f"{candidate}/{release_manifest_path}"
)
try:
Expand Down Expand Up @@ -523,6 +525,7 @@ def refresh_release_bundle(
repo_id=repo_id,
release_manifest_path=new_release_manifest_path,
revision=fetch_revision,
repo_type=data_package_json.get("repo_type", "model"),
allow_main_fallback=release_manifest_revision is None,
)
if release_manifest_fetch is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"has_income_tax": true,
"has_region_registry": true,
"model_package_name": "policyengine-us",
"num_parameters_bucketed_100s": 896,
"num_variables_bucketed_100s": 52,
"num_parameters_bucketed_100s": 898,
"num_variables_bucketed_100s": 53,
"region_registry_country": "us"
}
52 changes: 52 additions & 0 deletions tests/test_bundle_refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,58 @@ def fake_urlopen(request, *args, **kwargs):
)


def test__bump_model_only_fetches_dataset_repo_release_manifest(sandbox) -> None:
manifest_path = sandbox["manifest_dir"] / "us.json"
manifest = json.loads(manifest_path.read_text())
manifest["data_package"]["repo_id"] = "policyengine/populace-us"
manifest["data_package"]["repo_type"] = "dataset"
manifest["data_package"]["release_manifest_path"] = (
"releases/1.70.0/release_manifest.json"
)
manifest["certified_data_artifact"]["uri"] = (
"hf://policyengine/populace-us/enhanced_cps_2024.h5@old-dataset-commit"
)
manifest_path.write_text(json.dumps(manifest, indent=2))

def fake_urlopen(request, *args, **kwargs):
url = request.full_url
if "pypi.org" in url:
return _pypi_response("policyengine-us", "1.653.3")
if (
url == "https://huggingface.co/datasets/policyengine/populace-us/resolve/"
"old-release-manifest-commit/releases/1.70.0/release_manifest.json"
):
return _data_release_manifest_response(
data_version="1.70.0",
dataset_sha256="d" * 64,
compatible_model_packages=[
{"name": "policyengine-us", "specifier": "==1.600.0"},
{"name": "policyengine-us", "specifier": "==1.653.3"},
],
extra_artifacts={
"enhanced_cps_2024": {
"kind": "microdata",
"path": "enhanced_cps_2024.h5",
"repo_id": "policyengine/policyengine-us-data",
"revision": "old-dataset-commit",
"sha256": "d" * 64,
}
},
headers={"x-repo-commit": "old-release-manifest-commit"},
)
raise AssertionError(f"Unexpected URL fetched: {url}")

with patch("policyengine.provenance.bundle.urlopen", side_effect=fake_urlopen):
result = refresh_release_bundle(
country="us",
model_version="1.653.3",
manifest_dir=sandbox["manifest_dir"],
pyproject_path=sandbox["pyproject_path"],
)

assert result.new_model == "1.653.3"


def test__bump_model_only_requires_data_release_manifest_compatibility(
sandbox,
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_has_release_manifest_metadata(self):
assert us_latest.release_manifest is not None
assert us_latest.release_manifest.country_id == "us"
assert us_latest.model_package.name == "policyengine-us"
assert us_latest.model_package.version == "1.723.0"
assert us_latest.model_package.version == "1.726.0"
assert us_latest.data_package.name == "populace-data"
assert us_latest.data_package.version == "0.1.0"
assert (
Expand Down
20 changes: 15 additions & 5 deletions tests/test_release_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,19 @@
PYPROJECT.read_text(),
re.MULTILINE,
).group(1)
US_MODEL_VERSION = "1.723.0"
US_MODEL_VERSION = "1.726.0"
US_BUILT_WITH_MODEL_VERSION = "1.723.0"
US_DATA_RELEASE_VERSION = "0.1.0"
US_DATA_RELEASE_PATH = (
"releases/populace-us-2024-5da5a95-20260611/release_manifest.json"
)
US_DATA_RELEASE_REVISION = "populace-us-2024-5da5a95-20260611"
US_CERTIFICATION_SOURCE = "policyengine.py certification"
US_DATA_RELEASE_REVISION = "afa2748b79a139404a1c477e3a7a9ca8729c22ab"
US_DATA_ARTIFACT_REVISION = "populace-us-2024-5da5a95-20260611"
US_CERTIFICATION_SOURCE = "populace-data release manifest"
US_MANAGED_DATASET_URI = (
f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_ARTIFACT_REVISION}"
)
US_RELEASE_MANIFEST_DATASET_URI = (
f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}"
)

Expand Down Expand Up @@ -106,7 +110,10 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self):
assert (
manifest.certification.data_build_id == "populace-us-2024-5da5a95-20260611"
)
assert manifest.certification.compatibility_basis == "built_with_model_package"
assert (
manifest.certification.compatibility_basis
== "legacy_compatible_model_package"
)
assert (
manifest.certification.built_with_model_version
== US_BUILT_WITH_MODEL_VERSION
Expand Down Expand Up @@ -344,7 +351,10 @@ def test__given_release_manifest_artifact_uses_version_tag__then_rewrites_to_com
):
manifest = get_data_release_manifest("us")

assert manifest.artifacts["populace_us_2024"].uri == US_MANAGED_DATASET_URI
assert (
manifest.artifacts["populace_us_2024"].uri
== US_RELEASE_MANIFEST_DATASET_URI
)
assert (
manifest.source_sha256
== hashlib.sha256(json.dumps(payload).encode("utf-8")).hexdigest()
Expand Down
11 changes: 3 additions & 8 deletions tests/test_us_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@
US_INHERITED_SUBNATIONAL_REVISION = "1.115.5"


def _current_us_data_revision() -> str:
revision = get_release_manifest("us").data_package.release_manifest_revision
assert revision is not None
return revision
def _current_us_default_dataset_uri() -> str:
return get_release_manifest("us").default_dataset_uri


class TestUSStates:
Expand Down Expand Up @@ -114,10 +112,7 @@ def test__given_us_registry__then_has_national_region(self):
assert national.code == "us"
assert national.label == "United States"
assert national.region_type == "national"
assert (
national.dataset_path == "hf://policyengine/populace-us/"
f"populace_us_2024.h5@{_current_us_data_revision()}"
)
assert national.dataset_path == _current_us_default_dataset_uri()

def test__given_us_registry__then_has_51_states(self):
"""Given: US region registry
Expand Down
12 changes: 6 additions & 6 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading