From b5d3b572c9c69efcb04a654246269c0617694b8e Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 17 Jun 2026 12:09:46 -0400 Subject: [PATCH] Weight fiscal calibration loss by target value basis --- .../tests/test_us_fiscal_refresh_builder.py | 76 ++++++++++++++++++- packages/populace-calibrate/README.md | 2 +- .../src/populace/calibrate/solve.py | 63 +++++++-------- .../populace-calibrate/tests/test_solve.py | 23 +++++- tools/build_us_fiscal_refresh_release.py | 56 +++++++++++++- 5 files changed, 176 insertions(+), 44 deletions(-) diff --git a/packages/populace-build/tests/test_us_fiscal_refresh_builder.py b/packages/populace-build/tests/test_us_fiscal_refresh_builder.py index d91d765..f6b1f91 100644 --- a/packages/populace-build/tests/test_us_fiscal_refresh_builder.py +++ b/packages/populace-build/tests/test_us_fiscal_refresh_builder.py @@ -417,7 +417,7 @@ def test_fiscal_target_loss_weights_prioritize_national_totals() -> None: TargetSpec( name="distribution_row", entity="household", - value=1_000_000.0, + value=10.0, source="fixture", ), ), @@ -458,6 +458,80 @@ def test_fiscal_target_loss_weights_downweight_state_rows() -> None: assert weights[0] == weights[1] * builder.US_STATE_TARGET_LOSS_MULTIPLIER +def test_fiscal_target_loss_weights_scale_by_value_within_basis() -> None: + builder = _load_builder_module() + registry = TargetRegistry( + ( + TargetSpec( + name="amount_small", + entity="household", + value=100.0, + source="fixture", + metadata={"source_measure_id": "payment_amount"}, + ), + TargetSpec( + name="amount_large", + entity="household", + value=300.0, + source="fixture", + metadata={"source_measure_id": "payment_amount"}, + ), + TargetSpec( + name="returns_small", + entity="household", + value=10.0, + source="fixture", + metadata={ + "source_measure_id": "income_tax_liability_returns", + "count": "true", + }, + ), + TargetSpec( + name="returns_large", + entity="household", + value=30.0, + source="fixture", + metadata={"source_measure_id": "ctc_claims", "count": "true"}, + ), + ), + country="us", + ) + + weights = builder._fiscal_target_loss_weights(registry) + + assert weights.mean() == 1.0 + assert weights[1] / weights[0] == 3.0 + assert weights[3] / weights[2] == 3.0 + assert weights[0] == weights[2] + assert weights[1] == weights[3] + + +def test_fiscal_target_value_basis_keeps_person_counts_separate_from_amounts() -> None: + builder = _load_builder_module() + amount = TargetSpec( + name="amount", + entity="household", + value=100.0, + source="fixture", + metadata={"source_measure_id": "payment_amount"}, + ) + person_count = TargetSpec( + name="person_count", + entity="household", + value=100.0, + source="fixture", + metadata={ + "measure_mode": "positive_count", + "source_measure_id": "aptc_recipients", + "target_role": "aca_ptc_recipients", + "count_map_to": "person", + }, + ) + + assert builder._fiscal_target_value_basis(amount) == "amount" + assert builder._fiscal_target_value_basis(person_count) == "person_count" + + def test_release_gate_failures_reject_bad_ctc_fit() -> None: builder = _load_builder_module() ctc_target = 82_863_353_000.0 diff --git a/packages/populace-calibrate/README.md b/packages/populace-calibrate/README.md index 08ce278..9c18c0a 100644 --- a/packages/populace-calibrate/README.md +++ b/packages/populace-calibrate/README.md @@ -23,7 +23,7 @@ reproduces them. 3. **Solve for calibrated weights.** `calibrate(frame, targets, ...)` optimizes the log-weights with torch Adam to minimize **capped weighted MAPE**: `weighted_mean(min(abs((A @ w - b) / scale), cap))`. By default - `scale = max(abs(target), abs(initial_estimate), 1)` and `cap = 10` + `scale = max(abs(target), 1)` and `cap = 10` (1000%). Weights stay strictly positive by construction (`w = exp(log_w)`). The result carries a new `Frame` with `CALIBRATED` weights, per-target diagnostics, and the loss trajectory. diff --git a/packages/populace-calibrate/src/populace/calibrate/solve.py b/packages/populace-calibrate/src/populace/calibrate/solve.py index 0c8581e..f3f7c59 100644 --- a/packages/populace-calibrate/src/populace/calibrate/solve.py +++ b/packages/populace-calibrate/src/populace/calibrate/solve.py @@ -8,9 +8,8 @@ ``weighted_mean(abs((A @ w - b) / s))`` where ``s`` is a per-target scale fixed before optimization. By default, -``s = max(abs(b), abs(A @ w0), 1)``: zero-valued or tiny targets can no longer -dominate the objective merely because their denominator is near zero, while -large national aggregates remain measured as proportional misses. +``s = max(abs(b), 1)``: the loss is measured against the administrative target +value, while zero-valued targets use one unit in their measure basis. with torch's Adam over the **log-weights** (so weights stay strictly positive by construction). It returns a @@ -263,12 +262,12 @@ def relative_error_loss( non-finite inputs: a NaN estimate is a harness bug, not a large miss. ``target_loss_scales`` is the row denominator ``s`` in - ``abs((estimate - target) / s)``. If omitted, the diagnostic helper uses - ``max(abs(target), 1)``; :func:`calibrate` supplies the stronger production - default ``max(abs(target), abs(initial_estimate), 1)`` once it has compiled - the target matrix. Target weights are normalized by their own sum, so - multiplying all weights by a constant does not change the objective. Each - row's scaled absolute miss is capped by ``target_loss_cap``. + ``abs((estimate - target) / s)``. If omitted, rows use + ``max(abs(target), 1)`` so the target surface, not the starting estimate, + defines the permanent scale. Target weights are normalized by their own + sum, so multiplying all weights by a constant does not change the + objective. Each row's scaled absolute miss is capped by + ``target_loss_cap``. """ estimates = np.asarray(estimates, dtype=np.float64) targets = np.asarray(targets, dtype=np.float64) @@ -324,34 +323,29 @@ def _validate_target_loss_cap(target_loss_cap: float) -> float: def default_target_loss_scales( targets: np.ndarray, - initial_estimates: np.ndarray, + initial_estimates: np.ndarray | None = None, ) -> np.ndarray: """Default fixed row scales for calibration. The old objective used ``target + 1`` as the denominator. That makes a zero-valued target with a large starting estimate dominate the loss by many - orders of magnitude. The production scale is instead the largest meaningful - size already known before optimization: the absolute target, the absolute - starting estimate, or one unit. + orders of magnitude. The production scale is instead target-defined: + the absolute target or one unit in the row's measure basis. Starting + estimates are deliberately excluded; they describe the baseline, not the + administrative fact we are trying to hit. ``initial_estimates`` is accepted + only for compatibility with callers of the earlier helper signature. """ targets = np.asarray(targets, dtype=np.float64) - initial_estimates = np.asarray(initial_estimates, dtype=np.float64) - if targets.shape != initial_estimates.shape: - raise ValueError( - "targets and initial_estimates must align, got shapes " - f"{targets.shape} vs {initial_estimates.shape}." - ) - if not (np.isfinite(targets).all() and np.isfinite(initial_estimates).all()): - raise ValueError( - "default_target_loss_scales requires finite targets and estimates." - ) - return np.maximum.reduce( - [ - np.abs(targets), - np.abs(initial_estimates), - np.ones_like(targets, dtype=np.float64), - ] - ) + if initial_estimates is not None: + initial_estimates = np.asarray(initial_estimates, dtype=np.float64) + if targets.shape != initial_estimates.shape: + raise ValueError( + "targets and initial_estimates must align, got shapes " + f"{targets.shape} vs {initial_estimates.shape}." + ) + if not np.isfinite(targets).all(): + raise ValueError("default_target_loss_scales requires finite targets.") + return np.maximum(np.abs(targets), np.ones_like(targets, dtype=np.float64)) def _validate_target_loss_weights( @@ -900,7 +894,7 @@ def calibrate( their sum. target_loss_scales: Optional positive row scales aligned to the supplied :class:`TargetSet`. When omitted, compiled rows use - :func:`default_target_loss_scales`, fixed from the input weights. + :func:`default_target_loss_scales`, fixed from target values only. Supplying scales is mainly for harnesses and specialized releases. target_loss_cap: Positive per-row cap on scaled absolute misses. The default caps each target's objective contribution at 1000%. @@ -975,7 +969,7 @@ def calibrate( targets_t = torch.tensor(problem.target_vector, dtype=torch.float32) target_loss_weights_np: np.ndarray | None = None target_loss_scales_np: np.ndarray - target_loss_scale_kind = "default_initial_or_target" + target_loss_scale_kind = "default_target" if target_loss_weights_input is not None: weights_by_key = { target.key: weight @@ -1005,10 +999,7 @@ def calibrate( ) target_loss_scale_kind = "provided" else: - target_loss_scales_np = default_target_loss_scales( - problem.target_vector, - problem.estimates(w0), - ) + target_loss_scales_np = default_target_loss_scales(problem.target_vector) target_loss_weights_t = ( torch.tensor(target_loss_weights_np, dtype=torch.float32) if target_loss_weights_np is not None diff --git a/packages/populace-calibrate/tests/test_solve.py b/packages/populace-calibrate/tests/test_solve.py index c879bed..2bb3446 100644 --- a/packages/populace-calibrate/tests/test_solve.py +++ b/packages/populace-calibrate/tests/test_solve.py @@ -527,10 +527,7 @@ def test_final_loss_describes_the_returned_weights(feasible_frame) -> None: # Recompute the capped weighted-MAPE loss on the returned weights directly. # final_loss is a float64 closing eval, so it matches to machine epsilon. b = result.problem.target_vector - scales = default_target_loss_scales( - b, - result.problem.estimates(result.initial_weights), - ) + scales = default_target_loss_scales(b) est = result.problem.estimates(result.weights) true_loss = relative_error_loss(est, b, target_loss_scales=scales) assert abs(result.final_loss - true_loss) < 1e-9 @@ -544,6 +541,24 @@ def test_final_loss_describes_the_returned_weights(feasible_frame) -> None: assert abs(result.initial_loss - true_initial) < 1e-5 +def test_default_target_loss_scales_ignore_initial_estimates() -> None: + targets = np.asarray([0.0, 10.0, 1_000.0]) + initial_estimates = np.asarray([1_000_000.0, 20_000.0, 1.0]) + + scales = default_target_loss_scales(targets, initial_estimates) + + np.testing.assert_allclose(scales, np.asarray([1.0, 10.0, 1_000.0])) + + +def test_default_target_loss_scales_ignore_nonfinite_initial_estimates() -> None: + targets = np.asarray([0.0, 10.0, 1_000.0]) + initial_estimates = np.asarray([np.inf, np.nan, -np.inf]) + + scales = default_target_loss_scales(targets, initial_estimates) + + np.testing.assert_allclose(scales, np.asarray([1.0, 10.0, 1_000.0])) + + def test_mean_diagnostics_report_the_true_achieved_ratio(feasible_frame) -> None: """``mean`` diagnostics describe the true ratio, not the linearized row value. diff --git a/tools/build_us_fiscal_refresh_release.py b/tools/build_us_fiscal_refresh_release.py index 6aadcbc..f108888 100644 --- a/tools/build_us_fiscal_refresh_release.py +++ b/tools/build_us_fiscal_refresh_release.py @@ -69,7 +69,7 @@ POST_EXPORT_ABSOLUTE_TOLERANCE = 1_000_000.0 POST_EXPORT_RELATIVE_TOLERANCE = 5e-4 US_FISCAL_TARGET_LOSS_WEIGHTING = ( - "semantic_weighted_mape_initial_or_target_scale_cap_1000pct" + "semantic_value_weighted_mape_by_measure_basis_target_scale_cap_1000pct" ) US_FISCAL_TARGET_LOSS_CAP = 10.0 US_NATIONAL_TOTAL_TARGET_LOSS_MULTIPLIER = 25.0 @@ -1301,7 +1301,7 @@ def _write_npz(path: Path, *, result, registry: TargetRegistry) -> None: def _fiscal_target_loss_weights(registry: TargetRegistry) -> np.ndarray: - weights = np.ones(len(registry.specs), dtype=np.float64) + basis_weights = _fiscal_target_value_basis_weights(registry) state_multipliers = np.asarray( [ US_STATE_TARGET_LOSS_MULTIPLIER if spec.metadata.get("state_fips") else 1.0 @@ -1323,11 +1323,63 @@ def _fiscal_target_loss_weights(registry: TargetRegistry) -> np.ndarray: ], dtype=np.float64, ) + weights = basis_weights weights *= state_multipliers weights *= multipliers return weights / weights.mean() +def _fiscal_target_value_basis_weights(registry: TargetRegistry) -> np.ndarray: + weights = np.ones(len(registry.specs), dtype=np.float64) + bases = np.asarray( + [_fiscal_target_value_basis(spec) for spec in registry.specs], + dtype=object, + ) + values = np.asarray( + [max(abs(float(spec.value)), 1.0) for spec in registry.specs], + dtype=np.float64, + ) + for basis in sorted(set(bases.tolist())): + mask = bases == basis + mean_value = values[mask].mean() + if mean_value > 0: + weights[mask] = values[mask] / mean_value + return weights + + +def _fiscal_target_value_basis(spec) -> str: + metadata = spec.metadata + measure_mode = metadata.get("measure_mode", "") + source_measure_id = metadata.get("source_measure_id", "") + target_role = metadata.get("target_role", "") + if metadata.get("count") == "true": + return ( + "return_count" + if _fiscal_target_is_return_count_measure(source_measure_id) + else "count" + ) + if measure_mode in {"count", "positive_count"}: + if metadata.get("count_map_to") == "person" or target_role in { + "aca_enrollment", + "aca_ptc_recipients", + "medicaid_enrollment", + "medicaid_chip_enrollment", + }: + return "person_count" + return "count" + if "enrollment" in source_measure_id or "recipients" in source_measure_id: + return "person_count" + if "return" in source_measure_id and "count" in source_measure_id: + return "return_count" + return "amount" + + +def _fiscal_target_is_return_count_measure(source_measure_id: str) -> bool: + return source_measure_id == "return_count" or source_measure_id.endswith( + ("_returns", "_claims") + ) + + def _release_gate_failures( result, compilation: Mapping[str, object],