From b842730c41cb85e6cc285ba26a7d3d721c2de64a Mon Sep 17 00:00:00 2001 From: Pavel Makarchuk Date: Fri, 12 Jun 2026 01:11:22 -0400 Subject: [PATCH] populace.calibrate.diagnostics: the calibration evidence ships with the artifact MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CalibrationResult already carries everything an auditor needs — per-target initial/final estimates and tolerance verdicts, the per-epoch loss trajectory, every skipped target with its reason, and the solver options actually used — but none of it left the build machine: the build pushed diagnostics to telemetry and dropped them, and the published .npz kept only closing scalars. "Skipped and reported, never dropped silently" is only true if the report ships. diagnostics_payload() renders the result as strict JSON (non-finite floats become null; the writer passes allow_nan=False so anything that escapes the scrub fails loudly), and write_calibration_diagnostics() writes the calibration_diagnostics.json a release directory publishes next to its manifests. Fixes #10. Co-Authored-By: Claude Fable 5 --- .../src/populace/calibrate/__init__.py | 8 ++ .../src/populace/calibrate/diagnostics.py | 116 ++++++++++++++++++ .../tests/test_diagnostics.py | 106 ++++++++++++++++ 3 files changed, 230 insertions(+) create mode 100644 packages/populace-calibrate/src/populace/calibrate/diagnostics.py create mode 100644 packages/populace-calibrate/tests/test_diagnostics.py diff --git a/packages/populace-calibrate/src/populace/calibrate/__init__.py b/packages/populace-calibrate/src/populace/calibrate/__init__.py index 2d2e798..9ed8555 100644 --- a/packages/populace-calibrate/src/populace/calibrate/__init__.py +++ b/packages/populace-calibrate/src/populace/calibrate/__init__.py @@ -74,6 +74,11 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: _assert_frame_compatible(_frame_version, _REQUIRED_FRAME_SERIES) +from populace.calibrate.diagnostics import ( # noqa: E402 - after the compat gate + CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + diagnostics_payload, + write_calibration_diagnostics, +) from populace.calibrate.matrix import ( # noqa: E402 - after the compat gate CalibrationProblem, SkippedTarget, @@ -102,6 +107,7 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: __all__ = [ "AGGREGATIONS", + "CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION", "CONSERVE_MASS", "FREE_MASS", "CalibrationProblem", @@ -114,7 +120,9 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None: "TargetSpec", "build_constraint_matrix", "calibrate", + "diagnostics_payload", "relative_error_loss", "specs_from_pe_surface", + "write_calibration_diagnostics", "__version__", ] diff --git a/packages/populace-calibrate/src/populace/calibrate/diagnostics.py b/packages/populace-calibrate/src/populace/calibrate/diagnostics.py new file mode 100644 index 0000000..13e0701 --- /dev/null +++ b/packages/populace-calibrate/src/populace/calibrate/diagnostics.py @@ -0,0 +1,116 @@ +"""Serialize a calibration's diagnostics so they travel with the artifact. + +A :class:`~populace.calibrate.solve.CalibrationResult` carries everything a +reviewer needs to audit what calibration did — per-target estimates before +and after, the per-epoch loss trajectory, the targets that failed to compile +*and why*, and the solver options actually used. Until now none of it left +the build machine: the build pushed the diagnostics to telemetry and dropped +them, and the published ``.npz`` kept only closing scalars. "Skipped and +reported, never dropped silently" is only true if the report ships. + +:func:`diagnostics_payload` renders the result as a JSON-stable dict, and +:func:`write_calibration_diagnostics` writes it as +``calibration_diagnostics.json`` — the artifact a release publishes next to +its manifests (charter rule: artifacts carry their environment; a published +dataset's calibration evidence belongs with the dataset, not in telemetry). +""" + +from __future__ import annotations + +import json +import math +from pathlib import Path + +from populace.calibrate.solve import CalibrationResult + +__all__ = [ + "CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION", + "diagnostics_payload", + "write_calibration_diagnostics", +] + +#: Version of the diagnostics payload. Consumers (dashboards, scorers) key +#: their readers on it; bump it with any shape change. +CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION = 1 + + +def _finite(value: float) -> float | None: + """JSON has no NaN/inf; a non-finite diagnostic serializes as null.""" + value = float(value) + return value if math.isfinite(value) else None + + +def _jsonable(value: object) -> object: + """An option value as strict JSON: non-finite floats become null.""" + if isinstance(value, float): + return _finite(value) + return value + + +def diagnostics_payload(result: CalibrationResult) -> dict: + """Render a calibration result as a JSON-stable diagnostics payload. + + The payload carries the full evidence, not summaries: every per-target + row, the whole loss trajectory, and every skipped target with its + reason. Summary scalars (``final_loss``, ``fraction_within_10pct``) are + included so a consumer need not recompute them, but they are derived + from the rows, never a substitute. + + Args: + result: The :func:`~populace.calibrate.solve.calibrate` output. + + Returns: + A dict that round-trips through ``json`` unchanged (non-finite + floats become ``null``). + """ + return { + "schema_version": CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + "weight_entity": result.weight_entity, + "options": {key: _jsonable(value) for key, value in result.options.items()}, + "l0_lambda": _finite(result.l0_lambda), + "n_nonzero": int(result.n_nonzero), + "n_records": int(result.weights.shape[0]), + "initial_loss": _finite(result.initial_loss), + "final_loss": _finite(result.final_loss), + "fraction_within_10pct": _finite(result.fraction_within_10pct), + "loss_trajectory": [_finite(loss) for loss in result.loss_trajectory], + "skipped": [ + {"name": skip.target.name, "reason": skip.reason} + for skip in result.skipped + ], + "targets": [ + { + "name": diagnostic.name, + "target": _finite(diagnostic.target), + "initial_estimate": _finite(diagnostic.initial_estimate), + "final_estimate": _finite(diagnostic.final_estimate), + "relative_error": _finite(diagnostic.relative_error), + "within_tolerance": diagnostic.within_tolerance, + } + for diagnostic in result.diagnostics + ], + } + + +def write_calibration_diagnostics( + result: CalibrationResult, path: Path | str +) -> Path: + """Write the diagnostics payload to ``path`` as JSON. + + The conventional filename is ``calibration_diagnostics.json`` inside a + release directory, alongside ``build_manifest.json``. + + Args: + result: The :func:`~populace.calibrate.solve.calibrate` output. + path: Destination file path; parent directories must exist. + + Returns: + The path written. + """ + path = Path(path) + # allow_nan=False is the guard: a non-finite value that escaped the + # scrub is a bug here, not something to smuggle out as invalid JSON. + path.write_text( + json.dumps(diagnostics_payload(result), indent=1, allow_nan=False) + ) + return path diff --git a/packages/populace-calibrate/tests/test_diagnostics.py b/packages/populace-calibrate/tests/test_diagnostics.py new file mode 100644 index 0000000..7e523ff --- /dev/null +++ b/packages/populace-calibrate/tests/test_diagnostics.py @@ -0,0 +1,106 @@ +"""The diagnostics artifact: a calibration's evidence ships with it. + +Behavioral contracts: the payload carries every per-target row, the whole +loss trajectory, and every skipped target with its reason; it round-trips +through strict JSON (no NaN/Infinity tokens); and the file writer produces +the artifact a release directory publishes. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from populace.calibrate import ( + CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION, + Target, + TargetSet, + calibrate, + diagnostics_payload, + write_calibration_diagnostics, +) + + +def _result(feasible_frame, *, with_skip: bool = False, epochs: int = 120): + frame, truths = feasible_frame() + targets = [ + Target( + name="population", + entity="household", + aggregation="count", + value=truths["population"] * 1.2, + ), + Target( + name="income", + entity="household", + aggregation="sum", + value=truths["income"] * 1.2, + measure="income", + tolerance=truths["income"], + ), + ] + if with_skip: + targets.append( + Target( + name="ghost", + entity="household", + aggregation="sum", + value=1.0, + measure="no_such_column", + ) + ) + return calibrate(frame, TargetSet(tuple(targets)), epochs=epochs, seed=0) + + +def test_payload_carries_full_evidence(feasible_frame) -> None: + result = _result(feasible_frame, epochs=120) + payload = diagnostics_payload(result) + + assert payload["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION + assert payload["weight_entity"] == "household" + assert len(payload["loss_trajectory"]) == 120 + assert len(payload["targets"]) == len(result.diagnostics) + assert payload["n_records"] == result.weights.shape[0] + assert payload["final_loss"] == result.final_loss + assert payload["fraction_within_10pct"] == result.fraction_within_10pct + assert payload["options"]["epochs"] == 120 + assert payload["options"]["seed"] == 0 + + income = next( + row for row in payload["targets"] if row["name"].startswith("income") + ) + assert income["initial_estimate"] is not None + assert income["final_estimate"] is not None + assert income["within_tolerance"] is True # tolerance was a whole truth wide + population = next( + row for row in payload["targets"] if row["name"].startswith("population") + ) + assert population["within_tolerance"] is None # no tolerance declared + + +def test_skipped_targets_ship_with_their_reason(feasible_frame) -> None: + result = _result(feasible_frame, with_skip=True) + payload = diagnostics_payload(result) + assert len(payload["skipped"]) == 1 + skip = payload["skipped"][0] + assert skip["name"] == "ghost" + assert "no_such_column" in skip["reason"] + # The skip never leaks into the compiled target rows. + assert all(not row["name"].startswith("ghost") for row in payload["targets"]) + + +def test_payload_is_strict_json(feasible_frame) -> None: + result = _result(feasible_frame) + payload = diagnostics_payload(result) + encoded = json.dumps(payload, allow_nan=False) # raises on NaN/inf + assert json.loads(encoded) == payload + + +def test_writer_round_trips(feasible_frame, tmp_path: Path) -> None: + result = _result(feasible_frame) + path = write_calibration_diagnostics( + result, tmp_path / "calibration_diagnostics.json" + ) + loaded = json.loads(path.read_text()) + assert loaded == diagnostics_payload(result) + assert loaded["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION