breimanntools · breimanntools · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/.github/scripts/seqopt_deap_comparison.py b/.github/scripts/seqopt_deap_comparison.py
@@ -16,7 +16,7 @@
 import tracemalloc
 import numpy as np
 
-from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import (
+from aaanalysis.protein_design._backend.seqopt.nsga2 import (
     fast_non_dominated_sort, crowding_distance, select_nsga2)
 
 try:

diff --git a/CONTEXT.md b/CONTEXT.md
@@ -128,7 +128,7 @@ _Avoid_: interaction (overloaded with the relational/PPI [[relational / interact
 ### SeqOpt directed-evolution vocabulary
 
 **SeqOpt**:
-The **search/optimization** layer of [[design / engineering]] (**[pro]**, `aaanalysis/protein_design_pro/`), the counterpart to [[SeqMut]]'s scoring: it *searches* over sequence variants of **one wild-type** for those that best satisfy several objectives at once. This is **protein engineering** — machine-learning-guided **directed evolution** of an *existing* sequence (Yang et al. 2019; Wittmann et al. 2021) — explicitly **not** *de novo protein design* (building new proteins from the ground up, e.g. RFdiffusion→ProteinMPNN→AlphaFold; Yang et al. 2026), which is out of scope. A `Tool` (`run` → [[Pareto front]] `df_pareto`, `eval` → Pareto-quality metrics), paired with `SeqOptPlot`. Reuses model-bound [[SeqMut]] as the fitness engine and `ShapModel` for residue guidance, so it is `pro` (imports SHAP) even though [[SeqMut]] stays core. Two modes — `"impact"` (SHAP-guided, adaptive) and `"importance"` (feature-importance-guided, greedy) — see [[guidance mode (impact / importance)]]. It realizes the search that [[SeqMut]] and [[combined variant]] defer to.
+The **search/optimization** layer of [[design / engineering]] (a **core** class in `aaanalysis/protein_design/`), the counterpart to [[SeqMut]]'s scoring: it *searches* over sequence variants of **one wild-type** for those that best satisfy several objectives at once. This is **protein engineering** — machine-learning-guided **directed evolution** of an *existing* sequence (Yang et al. 2019; Wittmann et al. 2021) — explicitly **not** *de novo protein design* (building new proteins from the ground up, e.g. RFdiffusion→ProteinMPNN→AlphaFold; Yang et al. 2026), which is out of scope. A `Tool` (`run` → [[Pareto front]] `df_pareto`, `eval` → Pareto-quality metrics), paired with `SeqOptPlot`. Reuses model-bound [[SeqMut]] as the fitness engine. Two modes — `"impact"` (SHAP-guided, adaptive) and `"importance"` (feature-importance-guided, greedy) — see [[guidance mode (impact / importance)]]. **Only `mode="impact"` needs `aaanalysis[pro]`** (it imports `ShapModel`/SHAP lazily; absent, it raises a friendly install hint); everything else (importance mode, the NSGA-II/EA layer, all plots) runs in a base install. It realizes the search that [[SeqMut]] and [[combined variant]] defer to.
 _Avoid_: optimizer (overloaded with numerical/perf optimization — this is evolutionary *search*); generator, sampler; **de novo design / protein design** (SeqOpt does protein *engineering* / directed evolution, not generation of new proteins).
 
 **population** / **generation**:

diff --git a/aaanalysis/__init__.py b/aaanalysis/__init__.py
@@ -7,7 +7,7 @@
 from .feature_engineering import AAclust, AAclustPlot, SequenceFeature, NumericalFeature, CPP, CPPGrid, CPPPlot
 from .pu_learning import dPULearn, dPULearnPlot
 from .explainable_ai import TreeModel
-from .protein_design import AAMut, AAMutPlot, SeqMut, SeqMutPlot
+from .protein_design import AAMut, AAMutPlot, SeqMut, SeqMutPlot, SeqOpt, SeqOptPlot
 from .plotting import (plot_get_clist, plot_get_cmap, plot_get_cdict,
                        plot_settings, plot_legend, plot_gcfs, plot_rank)
 from .metrics import (comp_auc_adjusted, comp_bic_score, comp_kld,
@@ -54,6 +54,8 @@
     "AAMutPlot",
     "SeqMut",
     "SeqMutPlot",
+    "SeqOpt",
+    "SeqOptPlot",
     "TreeModel",
     # "ShapModel"       # SHAP
     "plot_get_clist",
@@ -118,16 +120,6 @@ def missing_feature_stub(feature_name, error, mode="pro"):
     globals()["ShapModel"] = missing_feature_stub("ShapModel", e, mode="pro")
 
 
-try:
-    from .protein_design_pro import SeqOpt, SeqOptPlot
-    __all__.extend(["SeqOpt", "SeqOptPlot"])
-except ImportError as e:
-    SeqOpt = None
-    SeqOptPlot = None
-    globals()["SeqOpt"] = missing_feature_stub("SeqOpt", e, mode="pro")
-    globals()["SeqOptPlot"] = missing_feature_stub("SeqOptPlot", e, mode="pro")
-
-
 try:
     from .seq_analysis_pro import comp_seq_sim
     __all__.append("comp_seq_sim")

diff --git a/aaanalysis/protein_design/__init__.py b/aaanalysis/protein_design/__init__.py
@@ -1,22 +1,28 @@
 """
-Protein design: per-mutation and per-sequence ΔCPP feature-impact analysis.
+Protein design / engineering: per-mutation and per-sequence ΔCPP feature-impact analysis
+and multi-objective directed-evolution optimization.
 
-Public objects: AAMut(+Plot), SeqMut(+Plot).
+Public objects: AAMut(+Plot), SeqMut(+Plot), SeqOpt(+Plot).
 Consumes CPP feature impact from ``feature_engineering`` to score amino-acid mutations
-(``AAMut``) and whole-sequence variants (``SeqMut``); each is paired with a plot class
-for visualization.
+(``AAMut``) and whole-sequence variants (``SeqMut``); ``SeqOpt`` searches the variant space
+(NSGA-II) for the multi-objective Pareto front. Each is paired with a plot class. ``SeqOpt`` is
+a core class — only its SHAP-guided ``mode="impact"`` needs ``aaanalysis[pro]`` (imported lazily).
 
 See ``.claude/rules/code-conventions.md`` for conventions, ``CONTEXT.md`` for domain
-terms (protein-design (mutation / ΔCPP) vocabulary).
+terms (protein-design (mutation / ΔCPP) + SeqOpt directed-evolution vocabulary).
 """
 from ._aamut import AAMut
 from ._aamut_plot import AAMutPlot
 from ._seqmut import SeqMut
 from ._seqmut_plot import SeqMutPlot
+from ._seqopt import SeqOpt
+from ._seqopt_plot import SeqOptPlot
 
 __all__ = [
     "AAMut",
     "AAMutPlot",
     "SeqMut",
     "SeqMutPlot",
+    "SeqOpt",
+    "SeqOptPlot",
 ]
diff --git a/...s/protein_design_pro/_backend/__init__.py → ...rotein_design/_backend/seqopt/__init__.py b/...s/protein_design_pro/_backend/__init__.py → ...rotein_design/_backend/seqopt/__init__.py
diff --git a/...tein_design_pro/_backend/seqopt/genome.py → .../protein_design/_backend/seqopt/genome.py b/...tein_design_pro/_backend/seqopt/genome.py → .../protein_design/_backend/seqopt/genome.py
diff --git a/...ein_design_pro/_backend/seqopt/metrics.py → ...protein_design/_backend/seqopt/metrics.py b/...ein_design_pro/_backend/seqopt/metrics.py → ...protein_design/_backend/seqopt/metrics.py
diff --git a/...otein_design_pro/_backend/seqopt/nsga2.py → ...s/protein_design/_backend/seqopt/nsga2.py b/...otein_design_pro/_backend/seqopt/nsga2.py → ...s/protein_design/_backend/seqopt/nsga2.py
diff --git a/...ein_design_pro/_backend/seqopt/penalty.py → ...protein_design/_backend/seqopt/penalty.py b/...ein_design_pro/_backend/seqopt/penalty.py → ...protein_design/_backend/seqopt/penalty.py
diff --git a/...protein_design_pro/_backend/seqopt/run.py → ...sis/protein_design/_backend/seqopt/run.py b/...protein_design_pro/_backend/seqopt/run.py → ...sis/protein_design/_backend/seqopt/run.py
diff --git a/aaanalysis/protein_design_pro/_seqopt.py → aaanalysis/protein_design/_seqopt.py b/aaanalysis/protein_design_pro/_seqopt.py → aaanalysis/protein_design/_seqopt.py
@@ -1,8 +1,8 @@
 """
-This is a script for the frontend of the SeqOpt class (**[pro]**): SHAP-guided, fuzzy-labeled
-multi-objective directed-evolution optimization over sequence variants of one wild-type. SeqOpt
-reuses a model-bound SeqMut as its fitness engine and ShapModel for per-generation residue
-guidance, and is therefore gated behind the ``pro`` extra.
+This is a script for the frontend of the SeqOpt class: multi-objective directed-evolution
+optimization over sequence variants of one wild-type, reusing a model-bound SeqMut as its
+fitness engine. SeqOpt is a core class; only its SHAP-guided ``mode="impact"`` (per-generation
+ShapModel refit) needs the optional ``aaanalysis[pro]`` dependency, imported lazily.
 """
 from typing import Optional, List, Any, Callable, Tuple, Dict
 import numpy as np
@@ -11,8 +11,9 @@
 import aaanalysis.utils as ut
 from aaanalysis.template_classes import Tool
 from aaanalysis.feature_engineering._sequence_feature import SequenceFeature
-from aaanalysis.protein_design import SeqMut
-from aaanalysis.explainable_ai_pro import ShapModel
+from ._seqmut import SeqMut
+# ShapModel (and its heavy SHAP dependency) is imported lazily inside mode="impact" only, so
+# SeqOpt stays importable in a base install; mode="impact" then needs aaanalysis[pro].
 from ._backend.seqopt.genome import canonical, apply_genome, variant_label
 from ._backend.seqopt.run import evolve_nsga2, evolve_greedy
 from ._backend.seqopt.nsga2 import normalize_objectives_, rank_and_crowding
@@ -108,8 +109,9 @@ def residue_weights_(df_feat, col, base):
 # II Main Functions
 class SeqOpt(Tool):
     """
-    Sequence Optimizer (SeqOpt) class (**[pro]**, requires ``aaanalysis[pro]``) for SHAP-guided,
-    multi-objective directed evolution over sequence variants [Breimann24a]_.
+    Sequence Optimizer (SeqOpt) class for multi-objective directed evolution over sequence
+    variants [Breimann24a]_. Core class; only the SHAP-guided ``mode="impact"`` needs
+    ``aaanalysis[pro]`` (``mode="importance"`` and everything else run in a base install).
 
     ``SeqOpt`` performs **protein engineering**, not **de novo protein design**. The two are
     distinct paradigms: *de novo design* builds **new proteins from the ground up** rather than
@@ -186,9 +188,10 @@ def __init__(self,
         mode : str, default='impact'
             Residue-guidance mode. ``'impact'`` refits :class:`ShapModel` every generation under
             fuzzy labeling (the new variant's prediction score as a soft label vs. the balanced
-            reference) and mutates the strongest-``feat_impact`` residues. ``'importance'`` uses
-            the static ``feat_importance`` ranking from ``df_feat`` (no SHAP, no refit) and walks
-            positions highest-first.
+            reference) and mutates the strongest-``feat_impact`` residues — this is the only
+            SeqOpt feature that needs ``aaanalysis[pro]`` (SHAP), imported lazily. ``'importance'``
+            uses the static ``feat_importance`` ranking from ``df_feat`` (no SHAP, no refit) and
+            walks positions highest-first (no pro dependency).
         model : object, optional
             A fitted classifier exposing ``predict_proba`` used as the fitness engine (the
             ``delta_pred`` objective) and, in ``mode='impact'``, as the model whose attribution
@@ -227,12 +230,23 @@ def __init__(self,
                               target_class=target_class)
         self._model = model
         self._target_class = target_class
-        # mode='impact' needs the labeled reference set for the per-generation ShapModel refit.
+        # mode='impact' needs the labeled reference set + the (pro) ShapModel for the
+        # per-generation refit. ShapModel is imported lazily so SeqOpt stays core-importable.
+        self._ShapModel = None
         if self._mode == "impact":
             if model is None or df_seq_ref is None or labels is None:
                 raise ValueError("mode='impact' requires 'model', 'df_seq_ref' and 'labels' "
                                  "(the balanced reference the per-generation ShapModel refit is "
                                  "anchored on); use mode='importance' for a SHAP-free run.")
+            try:
+                from aaanalysis.explainable_ai_pro import ShapModel
+            except ImportError as e:
+                raise ImportError(
+                    "SeqOpt mode='impact' needs the SHAP-based ShapModel. Install via:\n"
+                    "\n\tpip install 'aaanalysis[pro]'\n\n"
+                    "or use mode='importance' (feature-importance-guided; no pro dependency)."
+                ) from e
+            self._ShapModel = ShapModel
             ut.check_df_seq(df_seq=df_seq_ref)
             labels = ut.check_labels(labels=labels, len_required=len(df_seq_ref))
         self._df_seq_ref = df_seq_ref
@@ -335,7 +349,7 @@ def _impact_weights(self, df_seq, df_feat, best_genome, wt_seq, base, jmd_n_len,
         p_var = float(proba[-1]) if proba.ndim == 1 and len(proba) else float(np.ravel(proba)[-1])
         p_var = min(max(p_var, 0.0), 1.0)
         labels_fuzzy = list(np.asarray(self._labels, dtype=float)) + [p_var]
-        sm = ShapModel(random_state=self._random_state, verbose=False)
+        sm = self._ShapModel(random_state=self._random_state, verbose=False)
         sm.fit(X, labels_fuzzy, fuzzy_labeling=True)
         df_imp = sm.add_feat_impact(df_feat.copy(), samples=int(len(X) - 1), names="var", drop=True)
         impact_cols = [c for c in df_imp.columns if c.startswith(ut.COL_FEAT_IMPACT)
@@ -401,8 +415,10 @@ def run(self,
         Parameters
         ----------
         df_seq : pd.DataFrame, shape (1, n_seq_info)
-            The single wild-type, in the **position-based** format (``sequence``, ``tmd_start``,
-            ``tmd_stop``). See :meth:`SequenceFeature.get_df_parts` for the full specification.
+            DataFrame containing an ``entry`` column with unique protein identifiers, in the
+            **position-based** format (``sequence``, ``tmd_start``, ``tmd_stop``). See
+            :meth:`SequenceFeature.get_df_parts` for the full ``df_seq`` format specification.
+            Must hold **exactly one** wild-type sequence (SeqOpt optimizes one sequence per run).
         df_feat : pd.DataFrame
             CPP feature set (output of :meth:`CPP.run`) defining the features and the residue
             attribution (``feat_importance`` / ``feat_impact``, ``positions``) the search reads.

diff --git a/...alysis/protein_design_pro/_seqopt_plot.py → aaanalysis/protein_design/_seqopt_plot.py b/...alysis/protein_design_pro/_seqopt_plot.py → aaanalysis/protein_design/_seqopt_plot.py
@@ -1,5 +1,5 @@
 """
-This is a script for the frontend of the SeqOptPlot class (**[pro]**) for visualizing SeqOpt
+This is a script for the frontend of the SeqOptPlot class for visualizing SeqOpt
 multi-objective directed-evolution results: the Pareto-front objective scatter and the
 per-generation hypervolume convergence trace.
 """
@@ -38,8 +38,7 @@ def _objective_cols(df_pareto):
 # II Main Functions
 class SeqOptPlot:
     """
-    Plotting class for :class:`SeqOpt` (Sequence Optimizer) results (**[pro]**, requires
-    ``aaanalysis[pro]``) [Breimann24a]_.
+    Plotting class for :class:`SeqOpt` (Sequence Optimizer) results [Breimann24a]_.
 
     Visualizes the Pareto front produced by :meth:`SeqOpt.run`: a 2-D objective scatter colored
     by non-dominated rank, and the per-generation hypervolume convergence trace.

diff --git a/aaanalysis/protein_design_pro/__init__.py b/aaanalysis/protein_design_pro/__init__.py
diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/__init__.py b/aaanalysis/protein_design_pro/_backend/seqopt/__init__.py
diff --git a/docs/adr/0043-seqopt-optimization-layer.md b/docs/adr/0043-seqopt-optimization-layer.md
@@ -38,6 +38,15 @@ dependency forced": that decision keeps `SeqMut` and the `protein_design` **core
 the new optimizer lives in a separate `*_pro` module. `SeqMut` (core) remains the fitness engine and
 is imported by `SeqOpt`.
 
+> **Amended (2026-06-26): D1 reversed — `SeqOpt`/`SeqOptPlot` are now CORE.** Only the SHAP-guided
+> `mode="impact"` actually needs SHAP, and it is the minority path (`mode="importance"` + the whole
+> NSGA-II/EA layer + plots are SHAP-free). `ShapModel` is therefore imported **lazily** inside the
+> impact path, so SeqOpt is importable in a base install; constructing `mode="impact"` without
+> `shap` raises a friendly `aaanalysis[pro]` hint. `SeqOpt`/`SeqOptPlot` + the `_backend/seqopt/`
+> code moved from `protein_design_pro/` into the core `protein_design/` subpackage, and the
+> now-empty `protein_design_pro` package was removed. The pro-gating in `aaanalysis/__init__.py` is
+> replaced by core exports.
+
 **D2 — Two guidance modes, named after their `df_feat` attribution column.**
 - `mode="impact"` (default, headline): per-round `ShapModel` refit under **fuzzy labeling** →
   fresh per-residue `|feat_impact|` → an **adaptive NSGA-II** population evolves the Pareto front.

diff --git a/docs/source/index/docstring_guide.rst b/docs/source/index/docstring_guide.rst
@@ -346,7 +346,7 @@ Rules:
      -
    * - ``SeqOpt`` / ``SeqOptPlot``
      - ``seqopt`` / ``seqopt_plot``
-     - ``pro``
+     - core (``mode="impact"`` needs ``pro``)
    * - ``TreeModel``
      - ``tm``
      -

diff --git a/docs/source/index/release_notes.rst b/docs/source/index/release_notes.rst
@@ -106,7 +106,8 @@ Added
 
 **Protein Design**
 
-- **SeqOpt — multi-objective protein engineering** (``[pro]``): A new ``SeqOpt`` optimizer
+- **SeqOpt — multi-objective protein engineering** (**core**; only ``mode="impact"`` needs
+  ``aaanalysis[pro]``): A new ``SeqOpt`` optimizer
   (with ``SeqOptPlot``) performs **machine-learning-guided directed evolution** of one
   wild-type — searching the Pareto front across several objectives at once, with a
   model-bound ``SeqMut`` as the fitness engine and a re-implementation of NSGA-II for

diff --git a/tests/unit/api_tests/test_backend_import_hygiene.py b/tests/unit/api_tests/test_backend_import_hygiene.py
@@ -30,7 +30,7 @@
         "num_feat": {"_numerical_feature"},
         "aaclust": {"_aaclust", "_aaclust_plot"},
     },
-    "protein_design_pro": {
+    "protein_design": {
         "seqopt": {"_seqopt"},
     },
 }

diff --git a/...t/protein_design_pro_tests/test_seqopt.py → tests/unit/seqopt_tests/test_seqopt.py b/...t/protein_design_pro_tests/test_seqopt.py → tests/unit/seqopt_tests/test_seqopt.py
@@ -1,7 +1,8 @@
-"""This is a script to test SeqOpt.run / SeqOpt.eval and SeqOptPlot (**[pro]**).
+"""This is a script to test SeqOpt.run / SeqOpt.eval and SeqOptPlot.
 
-Guarded by ``shap`` (the whole protein_design_pro subpackage imports ShapModel); skipped in a
-core-only environment. Tiny deterministic wild-type + real-scale df_feat so the genuine ΔCPP /
+SeqOpt is a core class — these tests run in a base install. Only the SHAP-guided
+``mode="impact"`` tests skip without ``shap`` (they call ``pytest.importorskip("shap")``
+locally). Tiny deterministic wild-type + real-scale df_feat so the genuine ΔCPP /
 SequenceFeature engine runs while staying fast.
 """
 import numpy as np
@@ -13,8 +14,7 @@
 import aaanalysis as aa
 import aaanalysis.utils as ut
 
-pytest.importorskip("shap")
-from aaanalysis.protein_design_pro import SeqOpt, SeqOptPlot  # noqa: E402
+from aaanalysis.protein_design import SeqOpt, SeqOptPlot
 
 settings.register_profile("ci", deadline=None)
 settings.load_profile("ci")
@@ -201,6 +201,19 @@ def test_delta_pred_without_model_raises(self, wt, df_feat):
 
 
 class TestSeqOptInit:
+    def test_seqopt_is_core(self):
+        # SeqOpt is a core class (not pro-gated): the real class, exported, not a stub.
+        assert "SeqOpt" in aa.__all__ and "SeqOptPlot" in aa.__all__
+        assert aa.SeqOpt.__module__ == "aaanalysis.protein_design._seqopt"
+
+    def test_impact_without_shap_raises_pro_hint(self, model, wt, monkeypatch):
+        # mode="impact" imports ShapModel lazily; without the pro dependency it raises a
+        # friendly install hint (SeqOpt itself stays importable in a base install).
+        import sys
+        monkeypatch.setitem(sys.modules, "aaanalysis.explainable_ai_pro", None)
+        with pytest.raises(ImportError, match=r"aaanalysis\[pro\]"):
+            SeqOpt(mode="impact", model=model, df_seq_ref=wt, labels=[1], random_state=0)
+
     def test_impact_requires_reference(self, model):
         with pytest.raises(ValueError, match="mode='impact'"):
             SeqOpt(mode="impact", model=model, df_seq_ref=None, labels=None)
@@ -215,6 +228,7 @@ def test_df_scales_accepted(self, model):
         assert so is not None
 
     def test_impact_mode_runs(self, wt, df_feat):
+        pytest.importorskip("shap")
         pytest.importorskip("sklearn")
         from sklearn.ensemble import RandomForestClassifier
         ref = pd.DataFrame({ut.COL_ENTRY: [f"R{i}" for i in range(8)],
@@ -234,6 +248,7 @@ def test_impact_mode_runs(self, wt, df_feat):
         assert _non_dominated(df)
 
     def test_impact_mode_df_seq_ref_with_extra_columns(self, wt, df_feat):
+        pytest.importorskip("shap")
         # Regression: a reference from load_dataset carries jmd_n/tmd/jmd_c/label columns; the
         # per-generation ShapModel refit must keep only the position-based columns (else the
         # appended variant row NaN-trips check_df_seq).

diff --git a/...n_design_pro_tests/test_seqopt_backend.py → .../unit/seqopt_tests/test_seqopt_backend.py b/...n_design_pro_tests/test_seqopt_backend.py → .../unit/seqopt_tests/test_seqopt_backend.py
@@ -9,11 +9,11 @@
 from hypothesis import given, settings
 import hypothesis.strategies as some
 
-from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import (
+from aaanalysis.protein_design._backend.seqopt.nsga2 import (
     normalize_objectives_, fast_non_dominated_sort, crowding_distance, crowded_better,
     dcd_tournament, select_nsga2)
-from aaanalysis.protein_design_pro._backend.seqopt.metrics import hypervolume, spread
-from aaanalysis.protein_design_pro._backend.seqopt.genome import (
+from aaanalysis.protein_design._backend.seqopt.metrics import hypervolume, spread
+from aaanalysis.protein_design._backend.seqopt.genome import (
     canonical, apply_genome, variant_label, random_genome, init_population, repair,
     crossover_uniform, crossover_npoint, mutate)
 

diff --git a/...sign_pro_tests/test_seqopt_deap_parity.py → ...t/seqopt_tests/test_seqopt_deap_parity.py b/...sign_pro_tests/test_seqopt_deap_parity.py → ...t/seqopt_tests/test_seqopt_deap_parity.py
@@ -18,7 +18,7 @@
 deap = pytest.importorskip("deap")
 from deap import base, creator, tools  # noqa: E402
 
-from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import (  # noqa: E402
+from aaanalysis.protein_design._backend.seqopt.nsga2 import (  # noqa: E402
     normalize_objectives_, fast_non_dominated_sort, crowding_distance, select_nsga2)
 
 settings.register_profile("ci", deadline=None)