Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/populace-build/tests/test_gates.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,31 @@ def test_matches_the_calibrator_formula(self) -> None:
expected = (((est - tgt) / (tgt + 1.0)) ** 2).mean()
assert relative_error_loss(est, tgt) == pytest.approx(expected)

def test_accepts_target_loss_weights(self) -> None:
est = np.asarray([110.0, 90.0])
tgt = np.asarray([100.0, 100.0])
weights = np.asarray([10.0, 1.0])
residual = ((est - tgt) / (tgt + 1.0)) ** 2
expected = np.average(residual, weights=weights)

assert relative_error_loss(
est,
tgt,
target_loss_weights=weights,
) == pytest.approx(expected)

def test_shape_mismatch_is_refused(self) -> None:
with pytest.raises(ValueError, match="must align"):
relative_error_loss(np.zeros(2), np.zeros(3))

def test_weight_shape_mismatch_is_refused(self) -> None:
with pytest.raises(ValueError, match="target_loss_weights"):
relative_error_loss(
np.zeros(2),
np.zeros(2),
target_loss_weights=np.zeros(3),
)


class TestZeroValuedAnchor:
def test_zero_anchor_gates_on_absolute_scale(self) -> None:
Expand Down
29 changes: 28 additions & 1 deletion packages/populace-build/tests/test_us_fiscal_refresh_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd

from populace.calibrate import TargetSpec
from populace.calibrate import TargetRegistry, TargetSpec
from populace.frame import Frame, WeightKind


Expand Down Expand Up @@ -392,6 +392,33 @@ def test_release_gate_failures_reject_missing_critical_targets() -> None:
]


def test_target_value_loss_weights_prioritize_large_targets() -> None:
builder = _load_builder_module()
registry = TargetRegistry(
(
TargetSpec(
name="small_count",
entity="household",
value=10.0,
source="fixture",
),
TargetSpec(
name="large_amount",
entity="household",
value=1_000_000.0,
source="fixture",
),
),
country="us",
)

weights = builder._target_value_loss_weights(registry)

assert weights.shape == (2,)
assert weights.mean() == 1.0
assert weights[1] > weights[0] * 10_000


def test_health_input_signal_gate_rejects_degenerate_aca_inputs() -> None:
builder = _load_builder_module()

Expand Down
110 changes: 102 additions & 8 deletions packages/populace-calibrate/src/populace/calibrate/solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,22 @@ def _apply_constraint(matrix: torch.Tensor, weights: torch.Tensor) -> torch.Tens
return matrix @ weights


def relative_error_loss(estimates: np.ndarray, targets: np.ndarray) -> float:
"""THE loss, in numpy: ``mean(((est - tgt)/(tgt + 1))**2)``.
def relative_error_loss(
estimates: np.ndarray,
targets: np.ndarray,
*,
target_loss_weights: np.ndarray | None = None,
) -> float:
"""THE loss, in numpy: weighted ``((est - tgt)/(tgt + 1))**2``.

The single canonical definition every measurement imports — the solver's
closing loss, the acceptance gates, and scorers all call this function
(the torch twin below is the autograd path of the same formula). Refuses
non-finite inputs: a NaN estimate is a harness bug, not a large miss.
When ``target_loss_weights`` is omitted, this is the historical unweighted
mean. When supplied, weights must align to targets and are normalized by
their own sum, so multiplying all weights by a constant does not change the
objective.
"""
estimates = np.asarray(estimates, dtype=np.float64)
targets = np.asarray(targets, dtype=np.float64)
Expand All @@ -258,11 +267,19 @@ def relative_error_loss(estimates: np.ndarray, targets: np.ndarray) -> float:
"estimate or target values."
)
rel = (estimates - targets) / (targets + 1.0)
return float((rel**2).mean())
loss = rel**2
weights = _validate_target_loss_weights(target_loss_weights, targets.shape)
if weights is None:
return float(loss.mean())
return float(np.average(loss, weights=weights))


def _relative_error_loss(estimate: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
"""The relative-error loss ``mean(((est - tgt)/(tgt + 1))**2)``.
def _relative_error_loss(
estimate: torch.Tensor,
targets: torch.Tensor,
target_loss_weights: torch.Tensor | None,
) -> torch.Tensor:
"""The relative-error loss, optionally averaged with target row weights.

The ``+1`` in the *denominator* is the regularizer: it keeps the loss finite
and well-scaled for targets near zero (a zero-valued count target then
Expand All @@ -275,7 +292,46 @@ def _relative_error_loss(estimate: torch.Tensor, targets: torch.Tensor) -> torch
also the loss this docstring has always described.
"""
rel_error = (estimate - targets) / (targets + 1.0)
return (rel_error**2).mean()
loss = rel_error**2
if target_loss_weights is None:
return loss.mean()
return (loss * target_loss_weights).sum() / target_loss_weights.sum()


def _validate_target_loss_weights(
target_loss_weights: np.ndarray | None,
shape: tuple[int, ...],
) -> np.ndarray | None:
if target_loss_weights is None:
return None
weights = np.asarray(target_loss_weights, dtype=np.float64)
if weights.shape != shape:
raise ValueError(
"target_loss_weights must align with targets, got shapes "
f"{weights.shape} vs {shape}."
)
if not np.isfinite(weights).all():
raise ValueError("target_loss_weights must be finite.")
if (weights < 0).any():
raise ValueError("target_loss_weights must be non-negative.")
if float(weights.sum()) <= 0.0:
raise ValueError("target_loss_weights must have positive total weight.")
return weights


def _target_loss_weight_options(
target_loss_weights: np.ndarray | None,
) -> Mapping[str, object]:
if target_loss_weights is None:
return {"kind": "uniform"}
weights = np.asarray(target_loss_weights, dtype=np.float64)
return {
"kind": "provided",
"n": int(weights.shape[0]),
"sum": float(weights.sum()),
"min": float(weights.min()),
"max": float(weights.max()),
}


def _build_diagnostics(
Expand Down Expand Up @@ -341,6 +397,7 @@ def _build_diagnostics(
def _optimize(
matrix: torch.Tensor,
targets: torch.Tensor,
target_loss_weights: torch.Tensor | None,
initial_weights: np.ndarray,
*,
epochs: int,
Expand Down Expand Up @@ -387,7 +444,7 @@ def _optimize(
if gates is not None:
weights = weights * gates()
estimate = _apply_constraint(matrix, weights)
loss = _relative_error_loss(estimate, targets)
loss = _relative_error_loss(estimate, targets, target_loss_weights)
penalty = (
l0_lambda * gates.get_penalty()
if (gates is not None and l0_lambda > 0.0)
Expand Down Expand Up @@ -451,6 +508,7 @@ def _optimize(
def _search_l0_lambda_for_budget(
matrix: torch.Tensor,
targets: torch.Tensor,
target_loss_weights: torch.Tensor | None,
initial_weights: np.ndarray,
*,
target_records: int,
Expand Down Expand Up @@ -508,6 +566,7 @@ def evaluate(lam: float) -> tuple[np.ndarray, np.ndarray, int]:
weights, trajectory = _optimize(
matrix,
targets,
target_loss_weights,
initial_weights,
epochs=epochs,
learning_rate=learning_rate,
Expand Down Expand Up @@ -676,6 +735,7 @@ def calibrate(
temperature: float = 0.25,
budget_iters: int = _DEFAULT_BUDGET_ITERS,
seed: int = 0,
target_loss_weights: np.ndarray | None = None,
) -> CalibrationResult:
"""Calibrate ``weight_entity``'s weights to ``targets`` over ``frame``.

Expand Down Expand Up @@ -721,6 +781,12 @@ def calibrate(
may spend bisecting ``l0_lambda`` (only used when ``target_records``
is set). Higher resolves the budget finer at a proportional cost.
seed: Seed for torch's RNG (the gate sampling), for reproducibility.
target_loss_weights: Optional non-negative row weights aligned to the
supplied :class:`TargetSet`. When omitted, every compiled target row
contributes equally (historical behavior). When supplied, the weights
for skipped targets are dropped with those targets, and the squared
bounded relative errors for compiled rows are averaged with the
remaining weights, normalized by their sum.

Returns:
A :class:`CalibrationResult` with the calibrated frame, per-target
Expand Down Expand Up @@ -768,6 +834,10 @@ def calibrate(
if budget_iters <= 0:
raise ValueError(f"budget_iters must be positive, got {budget_iters!r}.")

target_loss_weights_input = _validate_target_loss_weights(
target_loss_weights,
(len(targets),),
)
problem = build_constraint_matrix(frame, targets, weight_entity)
initial = problem.initial_weights
w0 = initial.values
Expand All @@ -776,6 +846,25 @@ def calibrate(
torch.manual_seed(seed)
matrix_t = _torch_constraint_matrix(problem.matrix)
targets_t = torch.tensor(problem.target_vector, dtype=torch.float32)
target_loss_weights_np: np.ndarray | None = None
if target_loss_weights_input is not None:
weights_by_key = {
target.key: weight
for target, weight in zip(targets, target_loss_weights_input, strict=True)
}
target_loss_weights_np = np.asarray(
[weights_by_key[target.key] for target in problem.targets],
dtype=np.float64,
)
target_loss_weights_np = _validate_target_loss_weights(
target_loss_weights_np,
problem.target_vector.shape,
)
target_loss_weights_t = (
torch.tensor(target_loss_weights_np, dtype=torch.float32)
if target_loss_weights_np is not None
else None
)

if target_records is not None:
# Budget control (Finding 3): search l0_lambda so the achieved non-zero
Expand All @@ -785,6 +874,7 @@ def calibrate(
_search_l0_lambda_for_budget(
matrix_t,
targets_t,
target_loss_weights_t,
w0,
target_records=target_records,
epochs=epochs,
Expand All @@ -804,6 +894,7 @@ def calibrate(
final_weights, trajectory = _optimize(
matrix_t,
targets_t,
target_loss_weights_t,
w0,
epochs=epochs,
learning_rate=learning_rate,
Expand Down Expand Up @@ -838,7 +929,9 @@ def calibrate(
# evaluated after the closing mass/cap projections — so final_loss describes
# what calibrate returns, not the trajectory's pre-projection tail.
closing_loss = relative_error_loss(
problem.estimates(final_weights), problem.target_vector
problem.estimates(final_weights),
problem.target_vector,
target_loss_weights=target_loss_weights_np,
)

return CalibrationResult(
Expand All @@ -861,6 +954,7 @@ def calibrate(
"max_weight_ratio": max_weight_ratio,
"target_records": target_records,
"seed": seed,
"target_loss_weights": _target_loss_weight_options(target_loss_weights_np),
"matrix_format": (
"sparse_csr" if matrix_t.layout == torch.sparse_csr else "dense"
),
Expand Down
89 changes: 89 additions & 0 deletions packages/populace-calibrate/tests/test_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from __future__ import annotations

import numpy as np
import pytest

from populace.calibrate import Target, TargetSet, calibrate
Expand Down Expand Up @@ -71,6 +72,94 @@ def test_conserve_mass_holds_total_free_mass_moves(feasible_frame) -> None:
assert abs(conserved_total - initial_total) / initial_total < 1e-6


def test_target_loss_weights_prioritize_conflicting_targets(feasible_frame) -> None:
frame, truths = feasible_frame()
low = truths["population"] * 0.5
high = truths["population"] * 1.5
targets = TargetSet(
(
Target(
name="population_low",
entity="household",
aggregation="count",
value=low,
),
Target(
name="population_high",
entity="household",
aggregation="count",
value=high,
),
)
)

uniform = calibrate(frame, targets, epochs=300, seed=0)
prioritized = calibrate(
frame,
targets,
epochs=300,
seed=0,
target_loss_weights=np.asarray([1.0, 1_000.0]),
)

uniform_total = uniform.weights.sum()
prioritized_total = prioritized.weights.sum()
assert abs(prioritized_total - high) < abs(uniform_total - high)
assert prioritized.options["target_loss_weights"]["kind"] == "provided"


def test_target_loss_weights_follow_skipped_targets(feasible_frame) -> None:
frame, truths = feasible_frame()
targets = TargetSet(
(
Target(
name="missing_measure",
entity="household",
aggregation="sum",
value=1.0,
measure="missing_measure",
),
_population_target(truths["population"], 1.0),
)
)

result = calibrate(
frame,
targets,
epochs=50,
seed=0,
target_loss_weights=np.asarray([1_000.0, 1.0]),
)

assert [skip.target.name for skip in result.skipped] == ["missing_measure"]
assert result.options["target_loss_weights"]["n"] == 1


def test_target_loss_weights_must_survive_skipped_targets(feasible_frame) -> None:
frame, truths = feasible_frame()
targets = TargetSet(
(
Target(
name="missing_measure",
entity="household",
aggregation="sum",
value=1.0,
measure="missing_measure",
),
_population_target(truths["population"], 1.0),
)
)

with pytest.raises(ValueError, match="positive total weight"):
calibrate(
frame,
targets,
epochs=50,
seed=0,
target_loss_weights=np.asarray([1.0, 0.0]),
)


def test_max_weight_ratio_is_respected(feasible_frame) -> None:
frame, truths = feasible_frame()
w0 = frame.resolve_weights("household").values
Expand Down
Loading
Loading