From 99b5be4aec798f68533d9c8a1d350533d548e798 Mon Sep 17 00:00:00 2001 From: Stephan Breimann Date: Fri, 26 Jun 2026 04:51:45 +0200 Subject: [PATCH] refactor(seqopt): make SeqOpt core (lazy ShapModel); only mode='impact' needs pro SeqOpt's EA machinery, plots, and mode='importance' are all pure-Python and need nothing pro; only the SHAP-guided mode='impact' needs shap. So: - Import ShapModel lazily inside the impact path; SeqOpt is now importable in a base install. Constructing mode='impact' without shap raises a friendly aaanalysis[pro] hint. - Move SeqOpt/SeqOptPlot + _backend/seqopt/ from protein_design_pro/ into the core protein_design/ subpackage; remove the now-empty protein_design_pro package. - __init__.py: replace the pro try/except gate with core exports of SeqOpt/SeqOptPlot. - DEDICATED_OWNERS, test imports (+ rename tests dir -> seqopt_tests, shap-guard only the impact tests), comparison script, [pro] markers, ADR-0043 (D1 amended), CONTEXT, release notes and the docstring-guide table all updated to 'core; impact needs pro'. Verified: SeqOpt imports + mode='importance' runs with shap blocked; mode='impact' raises the pro hint. 96 SeqOpt tests + 0 docstring defects. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/scripts/seqopt_deap_comparison.py | 2 +- CONTEXT.md | 2 +- aaanalysis/__init__.py | 14 ++---- aaanalysis/protein_design/__init__.py | 16 +++++-- .../_backend/seqopt}/__init__.py | 0 .../_backend/seqopt/genome.py | 0 .../_backend/seqopt/metrics.py | 0 .../_backend/seqopt/nsga2.py | 0 .../_backend/seqopt/penalty.py | 0 .../_backend/seqopt/run.py | 0 .../_seqopt.py | 46 +++++++++++++------ .../_seqopt_plot.py | 5 +- aaanalysis/protein_design_pro/__init__.py | 19 -------- .../_backend/seqopt/__init__.py | 0 docs/adr/0043-seqopt-optimization-layer.md | 9 ++++ docs/source/index/docstring_guide.rst | 2 +- docs/source/index/release_notes.rst | 3 +- .../api_tests/test_backend_import_hygiene.py | 2 +- .../test_seqopt.py | 25 ++++++++-- .../test_seqopt_backend.py | 6 +-- .../test_seqopt_deap_parity.py | 2 +- 21 files changed, 86 insertions(+), 67 deletions(-) rename aaanalysis/{protein_design_pro/_backend => protein_design/_backend/seqopt}/__init__.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_backend/seqopt/genome.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_backend/seqopt/metrics.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_backend/seqopt/nsga2.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_backend/seqopt/penalty.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_backend/seqopt/run.py (100%) rename aaanalysis/{protein_design_pro => protein_design}/_seqopt.py (93%) rename aaanalysis/{protein_design_pro => protein_design}/_seqopt_plot.py (99%) delete mode 100644 aaanalysis/protein_design_pro/__init__.py delete mode 100644 aaanalysis/protein_design_pro/_backend/seqopt/__init__.py rename tests/unit/{protein_design_pro_tests => seqopt_tests}/test_seqopt.py (94%) rename tests/unit/{protein_design_pro_tests => seqopt_tests}/test_seqopt_backend.py (97%) rename tests/unit/{protein_design_pro_tests => seqopt_tests}/test_seqopt_deap_parity.py (98%) diff --git a/.github/scripts/seqopt_deap_comparison.py b/.github/scripts/seqopt_deap_comparison.py index de83b800..e3f76c1a 100644 --- a/.github/scripts/seqopt_deap_comparison.py +++ b/.github/scripts/seqopt_deap_comparison.py @@ -16,7 +16,7 @@ import tracemalloc import numpy as np -from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import ( +from aaanalysis.protein_design._backend.seqopt.nsga2 import ( fast_non_dominated_sort, crowding_distance, select_nsga2) try: diff --git a/CONTEXT.md b/CONTEXT.md index 50d3e9f9..800eeab3 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -128,7 +128,7 @@ _Avoid_: interaction (overloaded with the relational/PPI [[relational / interact ### SeqOpt directed-evolution vocabulary **SeqOpt**: -The **search/optimization** layer of [[design / engineering]] (**[pro]**, `aaanalysis/protein_design_pro/`), the counterpart to [[SeqMut]]'s scoring: it *searches* over sequence variants of **one wild-type** for those that best satisfy several objectives at once. This is **protein engineering** — machine-learning-guided **directed evolution** of an *existing* sequence (Yang et al. 2019; Wittmann et al. 2021) — explicitly **not** *de novo protein design* (building new proteins from the ground up, e.g. RFdiffusion→ProteinMPNN→AlphaFold; Yang et al. 2026), which is out of scope. A `Tool` (`run` → [[Pareto front]] `df_pareto`, `eval` → Pareto-quality metrics), paired with `SeqOptPlot`. Reuses model-bound [[SeqMut]] as the fitness engine and `ShapModel` for residue guidance, so it is `pro` (imports SHAP) even though [[SeqMut]] stays core. Two modes — `"impact"` (SHAP-guided, adaptive) and `"importance"` (feature-importance-guided, greedy) — see [[guidance mode (impact / importance)]]. It realizes the search that [[SeqMut]] and [[combined variant]] defer to. +The **search/optimization** layer of [[design / engineering]] (a **core** class in `aaanalysis/protein_design/`), the counterpart to [[SeqMut]]'s scoring: it *searches* over sequence variants of **one wild-type** for those that best satisfy several objectives at once. This is **protein engineering** — machine-learning-guided **directed evolution** of an *existing* sequence (Yang et al. 2019; Wittmann et al. 2021) — explicitly **not** *de novo protein design* (building new proteins from the ground up, e.g. RFdiffusion→ProteinMPNN→AlphaFold; Yang et al. 2026), which is out of scope. A `Tool` (`run` → [[Pareto front]] `df_pareto`, `eval` → Pareto-quality metrics), paired with `SeqOptPlot`. Reuses model-bound [[SeqMut]] as the fitness engine. Two modes — `"impact"` (SHAP-guided, adaptive) and `"importance"` (feature-importance-guided, greedy) — see [[guidance mode (impact / importance)]]. **Only `mode="impact"` needs `aaanalysis[pro]`** (it imports `ShapModel`/SHAP lazily; absent, it raises a friendly install hint); everything else (importance mode, the NSGA-II/EA layer, all plots) runs in a base install. It realizes the search that [[SeqMut]] and [[combined variant]] defer to. _Avoid_: optimizer (overloaded with numerical/perf optimization — this is evolutionary *search*); generator, sampler; **de novo design / protein design** (SeqOpt does protein *engineering* / directed evolution, not generation of new proteins). **population** / **generation**: diff --git a/aaanalysis/__init__.py b/aaanalysis/__init__.py index 41e71a99..29248171 100644 --- a/aaanalysis/__init__.py +++ b/aaanalysis/__init__.py @@ -7,7 +7,7 @@ from .feature_engineering import AAclust, AAclustPlot, SequenceFeature, NumericalFeature, CPP, CPPGrid, CPPPlot from .pu_learning import dPULearn, dPULearnPlot from .explainable_ai import TreeModel -from .protein_design import AAMut, AAMutPlot, SeqMut, SeqMutPlot +from .protein_design import AAMut, AAMutPlot, SeqMut, SeqMutPlot, SeqOpt, SeqOptPlot from .plotting import (plot_get_clist, plot_get_cmap, plot_get_cdict, plot_settings, plot_legend, plot_gcfs, plot_rank) from .metrics import (comp_auc_adjusted, comp_bic_score, comp_kld, @@ -54,6 +54,8 @@ "AAMutPlot", "SeqMut", "SeqMutPlot", + "SeqOpt", + "SeqOptPlot", "TreeModel", # "ShapModel" # SHAP "plot_get_clist", @@ -118,16 +120,6 @@ def missing_feature_stub(feature_name, error, mode="pro"): globals()["ShapModel"] = missing_feature_stub("ShapModel", e, mode="pro") -try: - from .protein_design_pro import SeqOpt, SeqOptPlot - __all__.extend(["SeqOpt", "SeqOptPlot"]) -except ImportError as e: - SeqOpt = None - SeqOptPlot = None - globals()["SeqOpt"] = missing_feature_stub("SeqOpt", e, mode="pro") - globals()["SeqOptPlot"] = missing_feature_stub("SeqOptPlot", e, mode="pro") - - try: from .seq_analysis_pro import comp_seq_sim __all__.append("comp_seq_sim") diff --git a/aaanalysis/protein_design/__init__.py b/aaanalysis/protein_design/__init__.py index 700619b5..920f52b7 100644 --- a/aaanalysis/protein_design/__init__.py +++ b/aaanalysis/protein_design/__init__.py @@ -1,22 +1,28 @@ """ -Protein design: per-mutation and per-sequence ΔCPP feature-impact analysis. +Protein design / engineering: per-mutation and per-sequence ΔCPP feature-impact analysis +and multi-objective directed-evolution optimization. -Public objects: AAMut(+Plot), SeqMut(+Plot). +Public objects: AAMut(+Plot), SeqMut(+Plot), SeqOpt(+Plot). Consumes CPP feature impact from ``feature_engineering`` to score amino-acid mutations -(``AAMut``) and whole-sequence variants (``SeqMut``); each is paired with a plot class -for visualization. +(``AAMut``) and whole-sequence variants (``SeqMut``); ``SeqOpt`` searches the variant space +(NSGA-II) for the multi-objective Pareto front. Each is paired with a plot class. ``SeqOpt`` is +a core class — only its SHAP-guided ``mode="impact"`` needs ``aaanalysis[pro]`` (imported lazily). See ``.claude/rules/code-conventions.md`` for conventions, ``CONTEXT.md`` for domain -terms (protein-design (mutation / ΔCPP) vocabulary). +terms (protein-design (mutation / ΔCPP) + SeqOpt directed-evolution vocabulary). """ from ._aamut import AAMut from ._aamut_plot import AAMutPlot from ._seqmut import SeqMut from ._seqmut_plot import SeqMutPlot +from ._seqopt import SeqOpt +from ._seqopt_plot import SeqOptPlot __all__ = [ "AAMut", "AAMutPlot", "SeqMut", "SeqMutPlot", + "SeqOpt", + "SeqOptPlot", ] \ No newline at end of file diff --git a/aaanalysis/protein_design_pro/_backend/__init__.py b/aaanalysis/protein_design/_backend/seqopt/__init__.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/__init__.py rename to aaanalysis/protein_design/_backend/seqopt/__init__.py diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/genome.py b/aaanalysis/protein_design/_backend/seqopt/genome.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/seqopt/genome.py rename to aaanalysis/protein_design/_backend/seqopt/genome.py diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/metrics.py b/aaanalysis/protein_design/_backend/seqopt/metrics.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/seqopt/metrics.py rename to aaanalysis/protein_design/_backend/seqopt/metrics.py diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py b/aaanalysis/protein_design/_backend/seqopt/nsga2.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py rename to aaanalysis/protein_design/_backend/seqopt/nsga2.py diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/penalty.py b/aaanalysis/protein_design/_backend/seqopt/penalty.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/seqopt/penalty.py rename to aaanalysis/protein_design/_backend/seqopt/penalty.py diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/run.py b/aaanalysis/protein_design/_backend/seqopt/run.py similarity index 100% rename from aaanalysis/protein_design_pro/_backend/seqopt/run.py rename to aaanalysis/protein_design/_backend/seqopt/run.py diff --git a/aaanalysis/protein_design_pro/_seqopt.py b/aaanalysis/protein_design/_seqopt.py similarity index 93% rename from aaanalysis/protein_design_pro/_seqopt.py rename to aaanalysis/protein_design/_seqopt.py index 7f06b7a5..ef5b41d4 100644 --- a/aaanalysis/protein_design_pro/_seqopt.py +++ b/aaanalysis/protein_design/_seqopt.py @@ -1,8 +1,8 @@ """ -This is a script for the frontend of the SeqOpt class (**[pro]**): SHAP-guided, fuzzy-labeled -multi-objective directed-evolution optimization over sequence variants of one wild-type. SeqOpt -reuses a model-bound SeqMut as its fitness engine and ShapModel for per-generation residue -guidance, and is therefore gated behind the ``pro`` extra. +This is a script for the frontend of the SeqOpt class: multi-objective directed-evolution +optimization over sequence variants of one wild-type, reusing a model-bound SeqMut as its +fitness engine. SeqOpt is a core class; only its SHAP-guided ``mode="impact"`` (per-generation +ShapModel refit) needs the optional ``aaanalysis[pro]`` dependency, imported lazily. """ from typing import Optional, List, Any, Callable, Tuple, Dict import numpy as np @@ -11,8 +11,9 @@ import aaanalysis.utils as ut from aaanalysis.template_classes import Tool from aaanalysis.feature_engineering._sequence_feature import SequenceFeature -from aaanalysis.protein_design import SeqMut -from aaanalysis.explainable_ai_pro import ShapModel +from ._seqmut import SeqMut +# ShapModel (and its heavy SHAP dependency) is imported lazily inside mode="impact" only, so +# SeqOpt stays importable in a base install; mode="impact" then needs aaanalysis[pro]. from ._backend.seqopt.genome import canonical, apply_genome, variant_label from ._backend.seqopt.run import evolve_nsga2, evolve_greedy from ._backend.seqopt.nsga2 import normalize_objectives_, rank_and_crowding @@ -108,8 +109,9 @@ def residue_weights_(df_feat, col, base): # II Main Functions class SeqOpt(Tool): """ - Sequence Optimizer (SeqOpt) class (**[pro]**, requires ``aaanalysis[pro]``) for SHAP-guided, - multi-objective directed evolution over sequence variants [Breimann24a]_. + Sequence Optimizer (SeqOpt) class for multi-objective directed evolution over sequence + variants [Breimann24a]_. Core class; only the SHAP-guided ``mode="impact"`` needs + ``aaanalysis[pro]`` (``mode="importance"`` and everything else run in a base install). ``SeqOpt`` performs **protein engineering**, not **de novo protein design**. The two are distinct paradigms: *de novo design* builds **new proteins from the ground up** rather than @@ -186,9 +188,10 @@ def __init__(self, mode : str, default='impact' Residue-guidance mode. ``'impact'`` refits :class:`ShapModel` every generation under fuzzy labeling (the new variant's prediction score as a soft label vs. the balanced - reference) and mutates the strongest-``feat_impact`` residues. ``'importance'`` uses - the static ``feat_importance`` ranking from ``df_feat`` (no SHAP, no refit) and walks - positions highest-first. + reference) and mutates the strongest-``feat_impact`` residues — this is the only + SeqOpt feature that needs ``aaanalysis[pro]`` (SHAP), imported lazily. ``'importance'`` + uses the static ``feat_importance`` ranking from ``df_feat`` (no SHAP, no refit) and + walks positions highest-first (no pro dependency). model : object, optional A fitted classifier exposing ``predict_proba`` used as the fitness engine (the ``delta_pred`` objective) and, in ``mode='impact'``, as the model whose attribution @@ -227,12 +230,23 @@ def __init__(self, target_class=target_class) self._model = model self._target_class = target_class - # mode='impact' needs the labeled reference set for the per-generation ShapModel refit. + # mode='impact' needs the labeled reference set + the (pro) ShapModel for the + # per-generation refit. ShapModel is imported lazily so SeqOpt stays core-importable. + self._ShapModel = None if self._mode == "impact": if model is None or df_seq_ref is None or labels is None: raise ValueError("mode='impact' requires 'model', 'df_seq_ref' and 'labels' " "(the balanced reference the per-generation ShapModel refit is " "anchored on); use mode='importance' for a SHAP-free run.") + try: + from aaanalysis.explainable_ai_pro import ShapModel + except ImportError as e: + raise ImportError( + "SeqOpt mode='impact' needs the SHAP-based ShapModel. Install via:\n" + "\n\tpip install 'aaanalysis[pro]'\n\n" + "or use mode='importance' (feature-importance-guided; no pro dependency)." + ) from e + self._ShapModel = ShapModel ut.check_df_seq(df_seq=df_seq_ref) labels = ut.check_labels(labels=labels, len_required=len(df_seq_ref)) self._df_seq_ref = df_seq_ref @@ -335,7 +349,7 @@ def _impact_weights(self, df_seq, df_feat, best_genome, wt_seq, base, jmd_n_len, p_var = float(proba[-1]) if proba.ndim == 1 and len(proba) else float(np.ravel(proba)[-1]) p_var = min(max(p_var, 0.0), 1.0) labels_fuzzy = list(np.asarray(self._labels, dtype=float)) + [p_var] - sm = ShapModel(random_state=self._random_state, verbose=False) + sm = self._ShapModel(random_state=self._random_state, verbose=False) sm.fit(X, labels_fuzzy, fuzzy_labeling=True) df_imp = sm.add_feat_impact(df_feat.copy(), samples=int(len(X) - 1), names="var", drop=True) impact_cols = [c for c in df_imp.columns if c.startswith(ut.COL_FEAT_IMPACT) @@ -401,8 +415,10 @@ def run(self, Parameters ---------- df_seq : pd.DataFrame, shape (1, n_seq_info) - The single wild-type, in the **position-based** format (``sequence``, ``tmd_start``, - ``tmd_stop``). See :meth:`SequenceFeature.get_df_parts` for the full specification. + DataFrame containing an ``entry`` column with unique protein identifiers, in the + **position-based** format (``sequence``, ``tmd_start``, ``tmd_stop``). See + :meth:`SequenceFeature.get_df_parts` for the full ``df_seq`` format specification. + Must hold **exactly one** wild-type sequence (SeqOpt optimizes one sequence per run). df_feat : pd.DataFrame CPP feature set (output of :meth:`CPP.run`) defining the features and the residue attribution (``feat_importance`` / ``feat_impact``, ``positions``) the search reads. diff --git a/aaanalysis/protein_design_pro/_seqopt_plot.py b/aaanalysis/protein_design/_seqopt_plot.py similarity index 99% rename from aaanalysis/protein_design_pro/_seqopt_plot.py rename to aaanalysis/protein_design/_seqopt_plot.py index 39560129..5ea4ca4d 100644 --- a/aaanalysis/protein_design_pro/_seqopt_plot.py +++ b/aaanalysis/protein_design/_seqopt_plot.py @@ -1,5 +1,5 @@ """ -This is a script for the frontend of the SeqOptPlot class (**[pro]**) for visualizing SeqOpt +This is a script for the frontend of the SeqOptPlot class for visualizing SeqOpt multi-objective directed-evolution results: the Pareto-front objective scatter and the per-generation hypervolume convergence trace. """ @@ -38,8 +38,7 @@ def _objective_cols(df_pareto): # II Main Functions class SeqOptPlot: """ - Plotting class for :class:`SeqOpt` (Sequence Optimizer) results (**[pro]**, requires - ``aaanalysis[pro]``) [Breimann24a]_. + Plotting class for :class:`SeqOpt` (Sequence Optimizer) results [Breimann24a]_. Visualizes the Pareto front produced by :meth:`SeqOpt.run`: a 2-D objective scatter colored by non-dominated rank, and the per-generation hypervolume convergence trace. diff --git a/aaanalysis/protein_design_pro/__init__.py b/aaanalysis/protein_design_pro/__init__.py deleted file mode 100644 index b9309417..00000000 --- a/aaanalysis/protein_design_pro/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Protein design (pro): SHAP-guided multi-objective directed-evolution optimization. - -Public objects: SeqOpt(+Plot). -The search/optimization counterpart to ``protein_design``'s scoring classes: ``SeqOpt`` -evolves multi-mutation variants of one wild-type toward a multi-objective Pareto front, -reusing a model-bound ``SeqMut`` as the fitness engine and ``ShapModel`` for per-generation -residue guidance. Gated behind the ``pro`` extra (imports SHAP). - -See ``.claude/rules/pro-core-boundary.md`` for the pro gating, ``CONTEXT.md`` for domain -terms (SeqOpt directed-evolution vocabulary). -""" -from ._seqopt import SeqOpt -from ._seqopt_plot import SeqOptPlot - -__all__ = [ - "SeqOpt", - "SeqOptPlot", -] diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/__init__.py b/aaanalysis/protein_design_pro/_backend/seqopt/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/adr/0043-seqopt-optimization-layer.md b/docs/adr/0043-seqopt-optimization-layer.md index 6f9c6fbe..8c356f80 100644 --- a/docs/adr/0043-seqopt-optimization-layer.md +++ b/docs/adr/0043-seqopt-optimization-layer.md @@ -38,6 +38,15 @@ dependency forced": that decision keeps `SeqMut` and the `protein_design` **core the new optimizer lives in a separate `*_pro` module. `SeqMut` (core) remains the fitness engine and is imported by `SeqOpt`. +> **Amended (2026-06-26): D1 reversed — `SeqOpt`/`SeqOptPlot` are now CORE.** Only the SHAP-guided +> `mode="impact"` actually needs SHAP, and it is the minority path (`mode="importance"` + the whole +> NSGA-II/EA layer + plots are SHAP-free). `ShapModel` is therefore imported **lazily** inside the +> impact path, so SeqOpt is importable in a base install; constructing `mode="impact"` without +> `shap` raises a friendly `aaanalysis[pro]` hint. `SeqOpt`/`SeqOptPlot` + the `_backend/seqopt/` +> code moved from `protein_design_pro/` into the core `protein_design/` subpackage, and the +> now-empty `protein_design_pro` package was removed. The pro-gating in `aaanalysis/__init__.py` is +> replaced by core exports. + **D2 — Two guidance modes, named after their `df_feat` attribution column.** - `mode="impact"` (default, headline): per-round `ShapModel` refit under **fuzzy labeling** → fresh per-residue `|feat_impact|` → an **adaptive NSGA-II** population evolves the Pareto front. diff --git a/docs/source/index/docstring_guide.rst b/docs/source/index/docstring_guide.rst index 0b18a0f2..bcb2c15e 100644 --- a/docs/source/index/docstring_guide.rst +++ b/docs/source/index/docstring_guide.rst @@ -346,7 +346,7 @@ Rules: - * - ``SeqOpt`` / ``SeqOptPlot`` - ``seqopt`` / ``seqopt_plot`` - - ``pro`` + - core (``mode="impact"`` needs ``pro``) * - ``TreeModel`` - ``tm`` - diff --git a/docs/source/index/release_notes.rst b/docs/source/index/release_notes.rst index 6999e187..9b57830d 100644 --- a/docs/source/index/release_notes.rst +++ b/docs/source/index/release_notes.rst @@ -106,7 +106,8 @@ Added **Protein Design** -- **SeqOpt — multi-objective protein engineering** (``[pro]``): A new ``SeqOpt`` optimizer +- **SeqOpt — multi-objective protein engineering** (**core**; only ``mode="impact"`` needs + ``aaanalysis[pro]``): A new ``SeqOpt`` optimizer (with ``SeqOptPlot``) performs **machine-learning-guided directed evolution** of one wild-type — searching the Pareto front across several objectives at once, with a model-bound ``SeqMut`` as the fitness engine and a re-implementation of NSGA-II for diff --git a/tests/unit/api_tests/test_backend_import_hygiene.py b/tests/unit/api_tests/test_backend_import_hygiene.py index e792bcea..6d8c1c79 100644 --- a/tests/unit/api_tests/test_backend_import_hygiene.py +++ b/tests/unit/api_tests/test_backend_import_hygiene.py @@ -30,7 +30,7 @@ "num_feat": {"_numerical_feature"}, "aaclust": {"_aaclust", "_aaclust_plot"}, }, - "protein_design_pro": { + "protein_design": { "seqopt": {"_seqopt"}, }, } diff --git a/tests/unit/protein_design_pro_tests/test_seqopt.py b/tests/unit/seqopt_tests/test_seqopt.py similarity index 94% rename from tests/unit/protein_design_pro_tests/test_seqopt.py rename to tests/unit/seqopt_tests/test_seqopt.py index fb467e12..ab1b7c6c 100644 --- a/tests/unit/protein_design_pro_tests/test_seqopt.py +++ b/tests/unit/seqopt_tests/test_seqopt.py @@ -1,7 +1,8 @@ -"""This is a script to test SeqOpt.run / SeqOpt.eval and SeqOptPlot (**[pro]**). +"""This is a script to test SeqOpt.run / SeqOpt.eval and SeqOptPlot. -Guarded by ``shap`` (the whole protein_design_pro subpackage imports ShapModel); skipped in a -core-only environment. Tiny deterministic wild-type + real-scale df_feat so the genuine ΔCPP / +SeqOpt is a core class — these tests run in a base install. Only the SHAP-guided +``mode="impact"`` tests skip without ``shap`` (they call ``pytest.importorskip("shap")`` +locally). Tiny deterministic wild-type + real-scale df_feat so the genuine ΔCPP / SequenceFeature engine runs while staying fast. """ import numpy as np @@ -13,8 +14,7 @@ import aaanalysis as aa import aaanalysis.utils as ut -pytest.importorskip("shap") -from aaanalysis.protein_design_pro import SeqOpt, SeqOptPlot # noqa: E402 +from aaanalysis.protein_design import SeqOpt, SeqOptPlot settings.register_profile("ci", deadline=None) settings.load_profile("ci") @@ -201,6 +201,19 @@ def test_delta_pred_without_model_raises(self, wt, df_feat): class TestSeqOptInit: + def test_seqopt_is_core(self): + # SeqOpt is a core class (not pro-gated): the real class, exported, not a stub. + assert "SeqOpt" in aa.__all__ and "SeqOptPlot" in aa.__all__ + assert aa.SeqOpt.__module__ == "aaanalysis.protein_design._seqopt" + + def test_impact_without_shap_raises_pro_hint(self, model, wt, monkeypatch): + # mode="impact" imports ShapModel lazily; without the pro dependency it raises a + # friendly install hint (SeqOpt itself stays importable in a base install). + import sys + monkeypatch.setitem(sys.modules, "aaanalysis.explainable_ai_pro", None) + with pytest.raises(ImportError, match=r"aaanalysis\[pro\]"): + SeqOpt(mode="impact", model=model, df_seq_ref=wt, labels=[1], random_state=0) + def test_impact_requires_reference(self, model): with pytest.raises(ValueError, match="mode='impact'"): SeqOpt(mode="impact", model=model, df_seq_ref=None, labels=None) @@ -215,6 +228,7 @@ def test_df_scales_accepted(self, model): assert so is not None def test_impact_mode_runs(self, wt, df_feat): + pytest.importorskip("shap") pytest.importorskip("sklearn") from sklearn.ensemble import RandomForestClassifier ref = pd.DataFrame({ut.COL_ENTRY: [f"R{i}" for i in range(8)], @@ -234,6 +248,7 @@ def test_impact_mode_runs(self, wt, df_feat): assert _non_dominated(df) def test_impact_mode_df_seq_ref_with_extra_columns(self, wt, df_feat): + pytest.importorskip("shap") # Regression: a reference from load_dataset carries jmd_n/tmd/jmd_c/label columns; the # per-generation ShapModel refit must keep only the position-based columns (else the # appended variant row NaN-trips check_df_seq). diff --git a/tests/unit/protein_design_pro_tests/test_seqopt_backend.py b/tests/unit/seqopt_tests/test_seqopt_backend.py similarity index 97% rename from tests/unit/protein_design_pro_tests/test_seqopt_backend.py rename to tests/unit/seqopt_tests/test_seqopt_backend.py index 72f319f4..22c6ceaa 100644 --- a/tests/unit/protein_design_pro_tests/test_seqopt_backend.py +++ b/tests/unit/seqopt_tests/test_seqopt_backend.py @@ -9,11 +9,11 @@ from hypothesis import given, settings import hypothesis.strategies as some -from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import ( +from aaanalysis.protein_design._backend.seqopt.nsga2 import ( normalize_objectives_, fast_non_dominated_sort, crowding_distance, crowded_better, dcd_tournament, select_nsga2) -from aaanalysis.protein_design_pro._backend.seqopt.metrics import hypervolume, spread -from aaanalysis.protein_design_pro._backend.seqopt.genome import ( +from aaanalysis.protein_design._backend.seqopt.metrics import hypervolume, spread +from aaanalysis.protein_design._backend.seqopt.genome import ( canonical, apply_genome, variant_label, random_genome, init_population, repair, crossover_uniform, crossover_npoint, mutate) diff --git a/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py b/tests/unit/seqopt_tests/test_seqopt_deap_parity.py similarity index 98% rename from tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py rename to tests/unit/seqopt_tests/test_seqopt_deap_parity.py index d593b007..b2fc4b34 100644 --- a/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py +++ b/tests/unit/seqopt_tests/test_seqopt_deap_parity.py @@ -18,7 +18,7 @@ deap = pytest.importorskip("deap") from deap import base, creator, tools # noqa: E402 -from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import ( # noqa: E402 +from aaanalysis.protein_design._backend.seqopt.nsga2 import ( # noqa: E402 normalize_objectives_, fast_non_dominated_sort, crowding_distance, select_nsga2) settings.register_profile("ci", deadline=None)