diff --git a/packages/populace-calibrate/src/populace/calibrate/__init__.py b/packages/populace-calibrate/src/populace/calibrate/__init__.py index 2d2e798..9ed8555 100644 --- a/packages/populace-calibrate/src/populace/calibrate/__init__.py +++ b/packages/populace-calibrate/src/populace/calibrate/__init__.py @@ -74,6 +74,11 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: _assert_frame_compatible(_frame_version, _REQUIRED_FRAME_SERIES) +from populace.calibrate.diagnostics import ( # noqa: E402 - after the compat gate + CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + diagnostics_payload, + write_calibration_diagnostics, +) from populace.calibrate.matrix import ( # noqa: E402 - after the compat gate CalibrationProblem, SkippedTarget, @@ -102,6 +107,7 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: __all__ = [ "AGGREGATIONS", + "CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION", "CONSERVE_MASS", "FREE_MASS", "CalibrationProblem", @@ -114,7 +120,9 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: "TargetSpec", "build_constraint_matrix", "calibrate", + "diagnostics_payload", "relative_error_loss", "specs_from_pe_surface", + "write_calibration_diagnostics", "__version__", ] diff --git a/packages/populace-calibrate/src/populace/calibrate/diagnostics.py b/packages/populace-calibrate/src/populace/calibrate/diagnostics.py new file mode 100644 index 0000000..13e0701 --- /dev/null +++ b/packages/populace-calibrate/src/populace/calibrate/diagnostics.py @@ -0,0 +1,116 @@ +"""Serialize a calibration's diagnostics so they travel with the artifact. + +A :class:`~populace.calibrate.solve.CalibrationResult` carries everything a +reviewer needs to audit what calibration did — per-target estimates before +and after, the per-epoch loss trajectory, the targets that failed to compile +*and why*, and the solver options actually used. Until now none of it left +the build machine: the build pushed the diagnostics to telemetry and dropped +them, and the published ``.npz`` kept only closing scalars. "Skipped and +reported, never dropped silently" is only true if the report ships. + +:func:`diagnostics_payload` renders the result as a JSON-stable dict, and +:func:`write_calibration_diagnostics` writes it as +``calibration_diagnostics.json`` — the artifact a release publishes next to +its manifests (charter rule: artifacts carry their environment; a published +dataset's calibration evidence belongs with the dataset, not in telemetry). +""" + +from __future__ import annotations + +import json +import math +from pathlib import Path + +from populace.calibrate.solve import CalibrationResult + +__all__ = [ + "CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION", + "diagnostics_payload", + "write_calibration_diagnostics", +] + +#: Version of the diagnostics payload. Consumers (dashboards, scorers) key +#: their readers on it; bump it with any shape change. +CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION = 1 + + +def _finite(value: float) -> float | None: + """JSON has no NaN/inf; a non-finite diagnostic serializes as null.""" + value = float(value) + return value if math.isfinite(value) else None + + +def _jsonable(value: object) -> object: + """An option value as strict JSON: non-finite floats become null.""" + if isinstance(value, float): + return _finite(value) + return value + + +def diagnostics_payload(result: CalibrationResult) -> dict: + """Render a calibration result as a JSON-stable diagnostics payload. + + The payload carries the full evidence, not summaries: every per-target + row, the whole loss trajectory, and every skipped target with its + reason. Summary scalars (``final_loss``, ``fraction_within_10pct``) are + included so a consumer need not recompute them, but they are derived + from the rows, never a substitute. + + Args: + result: The :func:`~populace.calibrate.solve.calibrate` output. + + Returns: + A dict that round-trips through ``json`` unchanged (non-finite + floats become ``null``). + """ + return { + "schema_version": CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + "weight_entity": result.weight_entity, + "options": {key: _jsonable(value) for key, value in result.options.items()}, + "l0_lambda": _finite(result.l0_lambda), + "n_nonzero": int(result.n_nonzero), + "n_records": int(result.weights.shape[0]), + "initial_loss": _finite(result.initial_loss), + "final_loss": _finite(result.final_loss), + "fraction_within_10pct": _finite(result.fraction_within_10pct), + "loss_trajectory": [_finite(loss) for loss in result.loss_trajectory], + "skipped": [ + {"name": skip.target.name, "reason": skip.reason} + for skip in result.skipped + ], + "targets": [ + { + "name": diagnostic.name, + "target": _finite(diagnostic.target), + "initial_estimate": _finite(diagnostic.initial_estimate), + "final_estimate": _finite(diagnostic.final_estimate), + "relative_error": _finite(diagnostic.relative_error), + "within_tolerance": diagnostic.within_tolerance, + } + for diagnostic in result.diagnostics + ], + } + + +def write_calibration_diagnostics( + result: CalibrationResult, path: Path | str +) -> Path: + """Write the diagnostics payload to ``path`` as JSON. + + The conventional filename is ``calibration_diagnostics.json`` inside a + release directory, alongside ``build_manifest.json``. + + Args: + result: The :func:`~populace.calibrate.solve.calibrate` output. + path: Destination file path; parent directories must exist. + + Returns: + The path written. + """ + path = Path(path) + # allow_nan=False is the guard: a non-finite value that escaped the + # scrub is a bug here, not something to smuggle out as invalid JSON. + path.write_text( + json.dumps(diagnostics_payload(result), indent=1, allow_nan=False) + ) + return path diff --git a/packages/populace-calibrate/tests/test_diagnostics.py b/packages/populace-calibrate/tests/test_diagnostics.py new file mode 100644 index 0000000..7e523ff --- /dev/null +++ b/packages/populace-calibrate/tests/test_diagnostics.py @@ -0,0 +1,106 @@ +"""The diagnostics artifact: a calibration's evidence ships with it. + +Behavioral contracts: the payload carries every per-target row, the whole +loss trajectory, and every skipped target with its reason; it round-trips +through strict JSON (no NaN/Infinity tokens); and the file writer produces +the artifact a release directory publishes. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from populace.calibrate import ( + CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + Target, + TargetSet, + calibrate, + diagnostics_payload, + write_calibration_diagnostics, +) + + +def _result(feasible_frame, *, with_skip: bool = False, epochs: int = 120): + frame, truths = feasible_frame() + targets = [ + Target( + name="population", + entity="household", + aggregation="count", + value=truths["population"] * 1.2, + ), + Target( + name="income", + entity="household", + aggregation="sum", + value=truths["income"] * 1.2, + measure="income", + tolerance=truths["income"], + ), + ] + if with_skip: + targets.append( + Target( + name="ghost", + entity="household", + aggregation="sum", + value=1.0, + measure="no_such_column", + ) + ) + return calibrate(frame, TargetSet(tuple(targets)), epochs=epochs, seed=0) + + +def test_payload_carries_full_evidence(feasible_frame) -> None: + result = _result(feasible_frame, epochs=120) + payload = diagnostics_payload(result) + + assert payload["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION + assert payload["weight_entity"] == "household" + assert len(payload["loss_trajectory"]) == 120 + assert len(payload["targets"]) == len(result.diagnostics) + assert payload["n_records"] == result.weights.shape[0] + assert payload["final_loss"] == result.final_loss + assert payload["fraction_within_10pct"] == result.fraction_within_10pct + assert payload["options"]["epochs"] == 120 + assert payload["options"]["seed"] == 0 + + income = next( + row for row in payload["targets"] if row["name"].startswith("income") + ) + assert income["initial_estimate"] is not None + assert income["final_estimate"] is not None + assert income["within_tolerance"] is True # tolerance was a whole truth wide + population = next( + row for row in payload["targets"] if row["name"].startswith("population") + ) + assert population["within_tolerance"] is None # no tolerance declared + + +def test_skipped_targets_ship_with_their_reason(feasible_frame) -> None: + result = _result(feasible_frame, with_skip=True) + payload = diagnostics_payload(result) + assert len(payload["skipped"]) == 1 + skip = payload["skipped"][0] + assert skip["name"] == "ghost" + assert "no_such_column" in skip["reason"] + # The skip never leaks into the compiled target rows. + assert all(not row["name"].startswith("ghost") for row in payload["targets"]) + + +def test_payload_is_strict_json(feasible_frame) -> None: + result = _result(feasible_frame) + payload = diagnostics_payload(result) + encoded = json.dumps(payload, allow_nan=False) # raises on NaN/inf + assert json.loads(encoded) == payload + + +def test_writer_round_trips(feasible_frame, tmp_path: Path) -> None: + result = _result(feasible_frame) + path = write_calibration_diagnostics( + result, tmp_path / "calibration_diagnostics.json" + ) + loaded = json.loads(path.read_text()) + assert loaded == diagnostics_payload(result) + assert loaded["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION