Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None:

_assert_frame_compatible(_frame_version, _REQUIRED_FRAME_SERIES)

from populace.calibrate.diagnostics import ( # noqa: E402 - after the compat gate
CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION,
diagnostics_payload,
write_calibration_diagnostics,
)
from populace.calibrate.matrix import ( # noqa: E402 - after the compat gate
CalibrationProblem,
SkippedTarget,
Expand Down Expand Up @@ -102,6 +107,7 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None:

__all__ = [
"AGGREGATIONS",
"CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION",
"CONSERVE_MASS",
"FREE_MASS",
"CalibrationProblem",
Expand All @@ -114,7 +120,9 @@ def _assert_frame_compatible(version: str, required: tuple[int, int]) -> None:
"TargetSpec",
"build_constraint_matrix",
"calibrate",
"diagnostics_payload",
"relative_error_loss",
"specs_from_pe_surface",
"write_calibration_diagnostics",
"__version__",
]
116 changes: 116 additions & 0 deletions packages/populace-calibrate/src/populace/calibrate/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Serialize a calibration's diagnostics so they travel with the artifact.

A :class:`~populace.calibrate.solve.CalibrationResult` carries everything a
reviewer needs to audit what calibration did — per-target estimates before
and after, the per-epoch loss trajectory, the targets that failed to compile
*and why*, and the solver options actually used. Until now none of it left
the build machine: the build pushed the diagnostics to telemetry and dropped
them, and the published ``.npz`` kept only closing scalars. "Skipped and
reported, never dropped silently" is only true if the report ships.

:func:`diagnostics_payload` renders the result as a JSON-stable dict, and
:func:`write_calibration_diagnostics` writes it as
``calibration_diagnostics.json`` — the artifact a release publishes next to
its manifests (charter rule: artifacts carry their environment; a published
dataset's calibration evidence belongs with the dataset, not in telemetry).
"""

from __future__ import annotations

import json
import math
from pathlib import Path

from populace.calibrate.solve import CalibrationResult

__all__ = [
"CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION",
"diagnostics_payload",
"write_calibration_diagnostics",
]

#: Version of the diagnostics payload. Consumers (dashboards, scorers) key
#: their readers on it; bump it with any shape change.
CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION = 1


def _finite(value: float) -> float | None:
"""JSON has no NaN/inf; a non-finite diagnostic serializes as null."""
value = float(value)
return value if math.isfinite(value) else None


def _jsonable(value: object) -> object:
"""An option value as strict JSON: non-finite floats become null."""
if isinstance(value, float):
return _finite(value)
return value


def diagnostics_payload(result: CalibrationResult) -> dict:
"""Render a calibration result as a JSON-stable diagnostics payload.

The payload carries the full evidence, not summaries: every per-target
row, the whole loss trajectory, and every skipped target with its
reason. Summary scalars (``final_loss``, ``fraction_within_10pct``) are
included so a consumer need not recompute them, but they are derived
from the rows, never a substitute.

Args:
result: The :func:`~populace.calibrate.solve.calibrate` output.

Returns:
A dict that round-trips through ``json`` unchanged (non-finite
floats become ``null``).
"""
return {
"schema_version": CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION,
"weight_entity": result.weight_entity,
"options": {key: _jsonable(value) for key, value in result.options.items()},
"l0_lambda": _finite(result.l0_lambda),
"n_nonzero": int(result.n_nonzero),
"n_records": int(result.weights.shape[0]),
"initial_loss": _finite(result.initial_loss),
"final_loss": _finite(result.final_loss),
"fraction_within_10pct": _finite(result.fraction_within_10pct),
"loss_trajectory": [_finite(loss) for loss in result.loss_trajectory],
"skipped": [
{"name": skip.target.name, "reason": skip.reason}
for skip in result.skipped
],
"targets": [
{
"name": diagnostic.name,
"target": _finite(diagnostic.target),
"initial_estimate": _finite(diagnostic.initial_estimate),
"final_estimate": _finite(diagnostic.final_estimate),
"relative_error": _finite(diagnostic.relative_error),
"within_tolerance": diagnostic.within_tolerance,
}
for diagnostic in result.diagnostics
],
}


def write_calibration_diagnostics(
result: CalibrationResult, path: Path | str
) -> Path:
"""Write the diagnostics payload to ``path`` as JSON.

The conventional filename is ``calibration_diagnostics.json`` inside a
release directory, alongside ``build_manifest.json``.

Args:
result: The :func:`~populace.calibrate.solve.calibrate` output.
path: Destination file path; parent directories must exist.

Returns:
The path written.
"""
path = Path(path)
# allow_nan=False is the guard: a non-finite value that escaped the
# scrub is a bug here, not something to smuggle out as invalid JSON.
path.write_text(
json.dumps(diagnostics_payload(result), indent=1, allow_nan=False)
)
return path
106 changes: 106 additions & 0 deletions packages/populace-calibrate/tests/test_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""The diagnostics artifact: a calibration's evidence ships with it.

Behavioral contracts: the payload carries every per-target row, the whole
loss trajectory, and every skipped target with its reason; it round-trips
through strict JSON (no NaN/Infinity tokens); and the file writer produces
the artifact a release directory publishes.
"""

from __future__ import annotations

import json
from pathlib import Path

from populace.calibrate import (
CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION,
Target,
TargetSet,
calibrate,
diagnostics_payload,
write_calibration_diagnostics,
)


def _result(feasible_frame, *, with_skip: bool = False, epochs: int = 120):
frame, truths = feasible_frame()
targets = [
Target(
name="population",
entity="household",
aggregation="count",
value=truths["population"] * 1.2,
),
Target(
name="income",
entity="household",
aggregation="sum",
value=truths["income"] * 1.2,
measure="income",
tolerance=truths["income"],
),
]
if with_skip:
targets.append(
Target(
name="ghost",
entity="household",
aggregation="sum",
value=1.0,
measure="no_such_column",
)
)
return calibrate(frame, TargetSet(tuple(targets)), epochs=epochs, seed=0)


def test_payload_carries_full_evidence(feasible_frame) -> None:
result = _result(feasible_frame, epochs=120)
payload = diagnostics_payload(result)

assert payload["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION
assert payload["weight_entity"] == "household"
assert len(payload["loss_trajectory"]) == 120
assert len(payload["targets"]) == len(result.diagnostics)
assert payload["n_records"] == result.weights.shape[0]
assert payload["final_loss"] == result.final_loss
assert payload["fraction_within_10pct"] == result.fraction_within_10pct
assert payload["options"]["epochs"] == 120
assert payload["options"]["seed"] == 0

income = next(
row for row in payload["targets"] if row["name"].startswith("income")
)
assert income["initial_estimate"] is not None
assert income["final_estimate"] is not None
assert income["within_tolerance"] is True # tolerance was a whole truth wide
population = next(
row for row in payload["targets"] if row["name"].startswith("population")
)
assert population["within_tolerance"] is None # no tolerance declared


def test_skipped_targets_ship_with_their_reason(feasible_frame) -> None:
result = _result(feasible_frame, with_skip=True)
payload = diagnostics_payload(result)
assert len(payload["skipped"]) == 1
skip = payload["skipped"][0]
assert skip["name"] == "ghost"
assert "no_such_column" in skip["reason"]
# The skip never leaks into the compiled target rows.
assert all(not row["name"].startswith("ghost") for row in payload["targets"])


def test_payload_is_strict_json(feasible_frame) -> None:
result = _result(feasible_frame)
payload = diagnostics_payload(result)
encoded = json.dumps(payload, allow_nan=False) # raises on NaN/inf
assert json.loads(encoded) == payload


def test_writer_round_trips(feasible_frame, tmp_path: Path) -> None:
result = _result(feasible_frame)
path = write_calibration_diagnostics(
result, tmp_path / "calibration_diagnostics.json"
)
loaded = json.loads(path.read_text())
assert loaded == diagnostics_payload(result)
assert loaded["schema_version"] == CALIBRATION_DIAGNOSTICS_SCHEMA_VERSION
Loading