PolicyEngine · MaxGhenis · Jun 6, 2026 · Jun 10, 2026
diff --git a/changelog.d/imputer-canonical.breaking.md b/changelog.d/imputer-canonical.breaking.md
@@ -0,0 +1,8 @@
+**Renamed `ZeroInflatedImputer` to the canonical `microimpute.Imputer`** and made it the opinionated default. The previous abstract base class `Imputer` is now `BaseImputer` (still exported). `microimpute.Imputer` is the regime-gated, QRF-based, sequentially-chained imputer:
+
+- **Sign-regime gating** (`{neg, 0, pos}`) on by default (`signregime=True`); pass `signregime=False` to impute each numeric target with the base model directly (no gate, the `REGIME_NO_GATE` path).
+- **QRF base model** by default (`base_imputer_class=QRF`); swap for experiments.
+- **Sequential chained-equations imputation is always on** — imputing a list of targets conditions each on the previously-imputed ones, preserving cross-variable joint structure. The old per-variable-independent path and its `sequential` flag are removed.
+- The fitted result exposes fitted state sklearn-style — `regimes_`, `predictors_` (the chained predictor list per target), and `models_` (sub-estimators by role: single/gate/positive/negative). QRF base sub-estimators carry standard `feature_importances_`/`feature_names_in_`: `feature_importances_` is a `{fitted_feature: importance}` dict keyed by the forest's actual fitted columns (so names and values always align, even when a categorical predictor expands into dummy columns), and `feature_names_in_` reports the original input predictor names.
+
+Migration: replace `from microimpute.models.zero_inflated import ZeroInflatedImputer` with `from microimpute import Imputer`; references to the old base class `Imputer` become `BaseImputer`.
diff --git a/changelog.d/imputer-canonical.fixed.md b/changelog.d/imputer-canonical.fixed.md
@@ -0,0 +1 @@
+**`Imputer.fit(weight_col=...)` now actually weights numeric-target imputations.** Previously the regime-gated fit forwarded `weight_col` only to the auxiliary non-numeric base imputer; the gate classifiers and per-regime base imputers for numeric targets all trained unweighted, so sampling weights were silently ignored. Weights are now resolved once (column name, array, or Series) and threaded through every nested fit: gate classifiers receive them as `sample_weight` and per-regime base imputers as `weight_col`, sliced with the same row mask as the training slice. Separately, the QRF learner now honors `sample_weight` by weighted bootstrap resampling of its training rows: `quantile_forest` only uses native `sample_weight` as a zero-weight leaf filter (and fully-grown forest leaves are single-sample), so the previous native pass-through left leaf quantile distributions — and every value imputed from them — unweighted.
diff --git a/microimpute/__init__.py b/microimpute/__init__.py
@@ -46,12 +46,7 @@
 )
 
 # Main configuration
-from microimpute.config import (
-    PLOT_CONFIG,
-    QUANTILES,
-    RANDOM_STATE,
-    VALIDATE_CONFIG,
-)
+from microimpute.config import PLOT_CONFIG, QUANTILES, RANDOM_STATE, VALIDATE_CONFIG
 
 # Import evaluation modules
 from microimpute.evaluations.cross_validation import cross_validate_model
@@ -61,8 +56,10 @@
     progressive_predictor_inclusion,
 )
 
-# Import main models and utilities
-from microimpute.models import OLS, QRF, Imputer, ImputerResults, QuantReg
+# Import main models and utilities. ``Imputer`` is the canonical
+# regime-gated, QRF-based, sequentially-chained imputer; ``BaseImputer``
+# is the abstract base class all models extend.
+from microimpute.models import OLS, QRF, BaseImputer, Imputer, ImputerResults, QuantReg
 
 # Import data handling functions
 from microimpute.utils.data import preprocess_data, unnormalize_predictions

diff --git a/microimpute/comparisons/autoimpute.py b/microimpute/comparisons/autoimpute.py
@@ -26,7 +26,7 @@
     TRAIN_SIZE,
     VALIDATE_CONFIG,
 )
-from microimpute.models import OLS, QRF, Imputer, QuantReg
+from microimpute.models import OLS, QRF, BaseImputer, QuantReg
 from microimpute.utils.data import (
     un_asinh_transform_predictions,
     unlog_transform_predictions,
@@ -121,7 +121,7 @@ class AutoImputeResult(BaseModel):
     receiver_data : pd.DataFrame
         Copy of the receiver data with the median-quantile imputations of the best performing model attached.
     fitted_models : Dict[str, Any]
-        Mapping model name → fitted Imputer instance.
+        Mapping model name → fitted BaseImputer instance.
     cv_results : Dict[str, Dict[str, Any]]
         Cross-validation results with separate quantile_loss and log_loss metrics for each model.
     """
@@ -203,7 +203,7 @@ def _setup_logging(log_level: str) -> int:
 
 
 def _evaluate_models_parallel(
-    model_classes: List[Type[Imputer]],
+    model_classes: List[Type[BaseImputer]],
     training_data: pd.DataFrame,
     predictors: List[str],
     imputed_variables: List[str],
@@ -279,7 +279,7 @@ def _evaluate_models_parallel(
 
 
 def _generate_imputations_for_all_models(
-    model_classes: List[Type[Imputer]],
+    model_classes: List[Type[BaseImputer]],
     best_method: str,
     donor_data: pd.DataFrame,
     receiver_data: pd.DataFrame,
@@ -500,7 +500,7 @@ def autoimpute(
 
         # Get model classes
         if not models:
-            model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
+            model_classes: List[Type[BaseImputer]] = [QRF, OLS, QuantReg]
             if HAS_MATCHING:
                 model_classes.append(Matching)
             if HAS_MDN:

diff --git a/microimpute/comparisons/autoimpute_helpers.py b/microimpute/comparisons/autoimpute_helpers.py
@@ -24,7 +24,7 @@
     validate_quantiles,
 )
 from microimpute.evaluations import cross_validate_model
-from microimpute.models import Imputer
+from microimpute.models import BaseImputer
 from microimpute.utils.data import preprocess_data
 
 log = logging.getLogger(__name__)
@@ -237,7 +237,7 @@ def prepare_data_for_imputation(
 
 
 def evaluate_model(
-    model: Type[Imputer],
+    model: Type[BaseImputer],
     data: pd.DataFrame,
     predictors: List[str],
     imputed_variables: List[str],
@@ -290,7 +290,7 @@ def evaluate_model(
 
 
 def fit_and_predict_model(
-    model_class: Type[Imputer],
+    model_class: Type[BaseImputer],
     training_data: pd.DataFrame,
     imputing_data: pd.DataFrame,
     predictors: List[str],

diff --git a/microimpute/evaluations/predictor_analysis.py b/microimpute/evaluations/predictor_analysis.py
@@ -30,7 +30,7 @@
     TRAIN_SIZE,
     VALIDATE_CONFIG,
 )
-from microimpute.models import Imputer, ImputerResults
+from microimpute.models import BaseImputer, ImputerResults
 from microimpute.utils.type_handling import (
     DummyVariableProcessor,
     VariableTypeDetector,
@@ -238,7 +238,7 @@ def leave_one_out_analysis(
     data: pd.DataFrame,
     predictors: List[str],
     imputed_variables: List[str],
-    model_class: Type[Imputer],
+    model_class: Type[BaseImputer],
     weight_col: Optional[Union[str, np.ndarray, pd.Series]] = None,
     quantiles: List[float] = QUANTILES,
     train_size: float = TRAIN_SIZE,
@@ -254,7 +254,7 @@ def leave_one_out_analysis(
         data: DataFrame containing the data.
         predictors: List of predictor column names.
         imputed_variables: List of variables to impute.
-        model_class: The Imputer class to use for evaluation (e.g., OLS, QRF, QuantReg).
+        model_class: The BaseImputer class to use for evaluation (e.g., OLS, QRF, QuantReg).
         weight_col: Optional column name or array of sampling weights.
         quantiles: List of quantiles for evaluation (default: [0.1, 0.5, 0.9]).
         train_size: Proportion of data to use for training (default: 0.8).
@@ -375,7 +375,7 @@ def progressive_predictor_inclusion(
     data: pd.DataFrame,
     predictors: List[str],
     imputed_variables: List[str],
-    model_class: Type[Imputer],
+    model_class: Type[BaseImputer],
     weight_col: Optional[Union[str, np.ndarray, pd.Series]] = None,
     quantiles: Optional[List[float]] = QUANTILES,
     train_size: Optional[float] = TRAIN_SIZE,
@@ -391,7 +391,7 @@ def progressive_predictor_inclusion(
         data: DataFrame containing the data.
         predictors: List of candidate predictor column names.
         imputed_variables: List of variables to impute.
-        model_class: The Imputer class to use for evaluation (e.g., OLS, QRF, QuantReg).
+        model_class: The BaseImputer class to use for evaluation (e.g., OLS, QRF, QuantReg).
         weight_col: Optional column name or array of sampling weights.
         quantiles: List of quantiles for evaluation.
         train_size: Proportion of data to use for training.
@@ -573,7 +573,7 @@ def _evaluate_model_performance(
     test_data: pd.DataFrame,
     predictors: List[str],
     imputed_variables: List[str],
-    model_class: Type[Imputer],
+    model_class: Type[BaseImputer],
     weight_col: Optional[Union[str, np.ndarray, pd.Series]],
     quantiles: List[float],
     random_state: int,

diff --git a/microimpute/models/__init__.py b/microimpute/models/__init__.py
@@ -2,9 +2,11 @@
 
 This module provides a collection of statistical models for data imputation,
 including both parametric and non-parametric approaches. Each model extends
-the base Imputer class and provides quantile-based predictions.
+the base imputer class and provides quantile-based predictions.
 
 Available models:
+    - Imputer: canonical regime-gated, QRF-based, sequentially-chained
+        imputer (the opinionated default)
     - OLS: ordinary least squares regression with bootstrapped quantiles
     - QRF: quantile regression forest for non-parametric quantile regression
     - QuantReg: linear quantile regression model
@@ -13,12 +15,12 @@
         (optional, requires pytorch-tabular)
 
 Base classes:
-    - Imputer: abstract base class for all imputation models
+    - BaseImputer: abstract base class for all imputation models
     - ImputerResults: container for fitted model and prediction methods
 """
 
 # Import base classes
-from microimpute.models.imputer import Imputer, ImputerResults
+from microimpute.models.imputer import BaseImputer, ImputerResults
 
 try:
     from microimpute.models.matching import Matching
@@ -34,3 +36,7 @@
 from microimpute.models.ols import OLS
 from microimpute.models.qrf import QRF
 from microimpute.models.quantreg import QuantReg
+
+# Canonical opinionated imputer: sign-regime gating + QRF base + sequential
+# chained-equations imputation, all on by default.
+from microimpute.models.regime_gated import Imputer
diff --git a/microimpute/models/imputer.py b/microimpute/models/imputer.py
@@ -2,7 +2,7 @@
 
 This module defines the core architecture for imputation models in MicroImpute.
 It provides two abstract base classes:
-1. Imputer - For model initialization and fitting
+1. BaseImputer - For model initialization and fitting
 2. ImputerResults - For storing fitted models and making predictions
 
 All model implementations should extend these classes to ensure a consistent interface.
@@ -35,7 +35,7 @@ def predict(self, X: pd.DataFrame, **kwargs) -> pd.Series:
         return pd.Series(self.constant_value, index=X.index, name=self.variable_name)
 
 
-class Imputer(ABC):
+class BaseImputer(ABC):
     """
     Abstract base class for fitting imputation models.
 
@@ -238,6 +238,47 @@ def preprocess_data_types(
             self.logger.error(f"Error during data preprocessing: {str(e)}")
             raise RuntimeError("Failed to preprocess data types") from e
 
+    @staticmethod
+    def _resolve_sample_weights(
+        X_train: pd.DataFrame,
+        weight_col: Optional[Union[str, np.ndarray, pd.Series]],
+    ) -> Optional[np.ndarray]:
+        """Resolve a weight specification to a validated per-row array.
+
+        Accepts a column name in ``X_train``, a positional array, or a
+        Series aligned by index, and returns a float array positionally
+        aligned with ``X_train`` rows (``None`` when no weights given).
+
+        Raises:
+            ValueError: If a named weight column is missing from
+                ``X_train``, or any resolved weight is non-positive or
+                NaN.
+        """
+        if weight_col is None:
+            return None
+        if isinstance(weight_col, str):
+            if weight_col not in X_train.columns:
+                raise ValueError(
+                    f"Weight column '{weight_col}' not found in training data"
+                )
+            weights = X_train[weight_col]
+        elif isinstance(weight_col, np.ndarray):
+            weights = pd.Series(weight_col, index=X_train.index)
+        else:
+            weights = weight_col.reindex(X_train.index)
+
+        # Check for NaN AND non-positive values together. NaN weights
+        # (e.g. from a Series reindex miss) would otherwise propagate
+        # into sample_weight passed to learners.
+        weights_arr = np.asarray(weights, dtype=float)
+        invalid_mask = np.isnan(weights_arr) | (weights_arr <= 0)
+        if invalid_mask.any():
+            raise ValueError(
+                "Weights must be positive and finite; found "
+                f"{int(invalid_mask.sum())} non-positive or NaN weight(s)"
+            )
+        return weights_arr
+
     @validate_call(config=VALIDATE_CONFIG)
     def fit(
         self,
@@ -261,7 +302,7 @@ def fit(
             X_train: DataFrame containing the training data.
             predictors: List of column names to use as predictors.
             imputed_variables: List of column names to impute.
-            weight_col: Optional name of the column or column array/series containing sampling weights. When provided, `X_train` will be sampled with replacement using this column as selection probabilities before fitting the model.
+            weight_col: Optional name of the column or column array/series containing sampling weights. When provided, the resolved per-row weights are passed to the model subclass as `sample_weight`, which honors them natively (e.g. OLS->WLS) or by weighted bootstrap resampling (QRF).
             skip_missing: If True, skip variables missing from training data with warning. If False, raise error for missing variables.
             not_numeric_categorical: Optional list of variable names that should
                 be treated as numeric even if they would normally be detected as
@@ -348,30 +389,7 @@ def fit(
         except Exception as e:
             raise ValueError(f"Invalid input data for model: {str(e)}") from e
 
-        weights = None
-        if weight_col is not None and isinstance(weight_col, str):
-            if weight_col not in X_train.columns:
-                raise ValueError(
-                    f"Weight column '{weight_col}' not found in training data"
-                )
-            weights = X_train[weight_col]
-        elif weight_col is not None and isinstance(weight_col, np.ndarray):
-            weights = pd.Series(weight_col, index=X_train.index)
-        elif weight_col is not None and isinstance(weight_col, pd.Series):
-            weights = weight_col.reindex(X_train.index)
-
-        if weights is not None:
-            # Check for NaN AND non-positive values together. Previously only
-            # (weights <= 0).any() was checked, which returns False for NaN
-            # weights — those then propagated into .sample() as NaN
-            # probabilities or corrupted sample_weight passed to learners.
-            weights_arr = np.asarray(weights, dtype=float)
-            invalid_mask = np.isnan(weights_arr) | (weights_arr <= 0)
-            if invalid_mask.any():
-                raise ValueError(
-                    "Weights must be positive and finite; found "
-                    f"{int(invalid_mask.sum())} non-positive or NaN weight(s)"
-                )
+        weights_arr = self._resolve_sample_weights(X_train, weight_col)
 
         # Identify target types BEFORE preprocessing
         self.identify_target_types(
@@ -393,21 +411,20 @@ def fit(
         self.imputed_vars_dummy_info = imputed_vars_dummy_info
         self.original_predictors = original_predictors
 
-        # Pass sample_weight through to the subclass so it can use each
-        # learner's native weighted-fit API (QRF, OLS→WLS, logistic, RFC all
-        # support sample_weight). This replaces the previous bootstrap
-        # resample, which silently discarded weights for the underlying
-        # estimator and inflated variance / shrank effective sample size.
-        sample_weight = None
-        if weights is not None:
-            sample_weight = np.asarray(weights_arr, dtype=float)
-            # Reindex if preprocess_data_types changed the row ordering
-            # (it currently does not, but guard against future drift).
-            if len(sample_weight) != len(X_train):
-                raise RuntimeError(
-                    "Internal error: sample_weight length no longer matches "
-                    "X_train after preprocessing"
-                )
+        # Pass sample_weight through to the subclass so it can honor the
+        # weights with whichever mechanism its learner supports: an exact
+        # native weighted-fit API where available (OLS→WLS), or weighted
+        # bootstrap resampling where the native API cannot weight the
+        # predictive distribution (the forest-backed QRF — see
+        # qrf._weighted_resample).
+        sample_weight = weights_arr
+        if sample_weight is not None and len(sample_weight) != len(X_train):
+            # preprocess_data_types currently preserves row order/count,
+            # but guard against future drift.
+            raise RuntimeError(
+                "Internal error: sample_weight length no longer matches "
+                "X_train after preprocessing"
+            )
 
         # Defer actual training to subclass with all parameters
         fit_kwargs = {

diff --git a/microimpute/models/matching.py b/microimpute/models/matching.py
@@ -7,7 +7,7 @@
 from pydantic import validate_call
 
 from microimpute.config import RANDOM_STATE, VALIDATE_CONFIG
-from microimpute.models.imputer import Imputer, ImputerResults
+from microimpute.models.imputer import BaseImputer, ImputerResults
 from microimpute.utils.statmatch_hotdeck import nnd_hotdeck_using_rpy2
 
 MatchingHotdeckFn = Callable[
@@ -402,7 +402,7 @@ def _process_matching_results(
             raise RuntimeError("Failed to create output imputations") from output_error
 
 
-class Matching(Imputer):
+class Matching(BaseImputer):
     """
     Statistical matching model for imputation using nearest neighbor distance
     hot deck method.

diff --git a/microimpute/models/mdn.py b/microimpute/models/mdn.py
@@ -14,7 +14,7 @@
 
 from microimpute.config import RANDOM_STATE, VALIDATE_CONFIG
 from microimpute.models.imputer import (
-    Imputer,
+    BaseImputer,
     ImputerResults,
     _ConstantValueModel,
 )
@@ -800,7 +800,7 @@ def _predict(
             raise RuntimeError(f"Failed to predict with MDN model: {str(e)}") from e
 
 
-class MDN(Imputer):
+class MDN(BaseImputer):
     """
     Mixture Density Network imputer using PyTorch Tabular.
 

diff --git a/microimpute/models/ols.py b/microimpute/models/ols.py
@@ -10,7 +10,7 @@
 from sklearn.linear_model import LogisticRegression
 
 from microimpute.config import VALIDATE_CONFIG
-from microimpute.models.imputer import Imputer, ImputerResults
+from microimpute.models.imputer import BaseImputer, ImputerResults
 
 
 class _LogisticRegressionModel:
@@ -462,7 +462,7 @@ def _predict_quantile(
             ) from e
 
 
-class OLS(Imputer):
+class OLS(BaseImputer):
     """
     Ordinary Least Squares regression model for imputation.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		`Imputer.fit(weight_col=...)` now actually weights numeric-target imputations. Previously the regime-gated fit forwarded `weight_col` only to the auxiliary non-numeric base imputer; the gate classifiers and per-regime base imputers for numeric targets all trained unweighted, so sampling weights were silently ignored. Weights are now resolved once (column name, array, or Series) and threaded through every nested fit: gate classifiers receive them as `sample_weight` and per-regime base imputers as `weight_col`, sliced with the same row mask as the training slice. Separately, the QRF learner now honors `sample_weight` by weighted bootstrap resampling of its training rows: `quantile_forest` only uses native `sample_weight` as a zero-weight leaf filter (and fully-grown forest leaves are single-sample), so the previous native pass-through left leaf quantile distributions — and every value imputed from them — unweighted.