diff --git a/.github/scripts/seqopt_deap_comparison.py b/.github/scripts/seqopt_deap_comparison.py index fb9b5ab3..de83b800 100644 --- a/.github/scripts/seqopt_deap_comparison.py +++ b/.github/scripts/seqopt_deap_comparison.py @@ -1,11 +1,10 @@ """Phase-C comparison: SeqOpt's pure-Python NSGA-II selection core vs the DEAP reference. -Benchmarks the NSGA-II survival selection (fast non-dominated sort + crowding + selNSGA2) of -three implementations across a grid of population size x objective count: +Benchmarks the NSGA-II survival selection (chunked-vectorized non-dominated sort + crowding + +selNSGA2) across a grid of population size x objective count: - * ours (engine="exact") — pure-Python, RNG-matched to DEAP - * ours (engine="fast") — numpy-vectorized non-dominated sort (identical result) - * DEAP (tools.selNSGA2) — the reference oracle (dev/test-only dependency) + * ours (tools.selNSGA2 re-implementation) — pure-Python, DEAP-free at runtime + * DEAP (tools.selNSGA2) — the reference oracle (dev/test-only dependency) Reports correctness (survivor rank/crowding profile identical to DEAP) plus wall-clock and peak memory, so the maintainer can make the ship-ours-vs-depend-on-DEAP call from data. The @@ -18,7 +17,7 @@ import numpy as np from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import ( - fast_non_dominated_sort, crowding_distance, select_nsga2, select_nsga2_engine) + fast_non_dominated_sort, crowding_distance, select_nsga2) try: from deap import base, creator, tools @@ -76,13 +75,11 @@ def main(): mu = n // 2 ref = profile(W, deap_select(W, mu)) runs = { - "ours-exact": lambda: select_nsga2(W, mu), - "ours-fast": lambda: select_nsga2_engine(W, mu, engine="fast"), + "ours": lambda: select_nsga2(W, mu), "deap": lambda: deap_select(W, mu), } sels = { - "ours-exact": set(select_nsga2(W, mu)[0]), - "ours-fast": set(select_nsga2_engine(W, mu, engine="fast")[0]), + "ours": set(select_nsga2(W, mu)[0]), "deap": deap_select(W, mu), } for name, fn in runs.items(): diff --git a/CONTEXT.md b/CONTEXT.md index 9db3fac2..a329e4f9 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -159,9 +159,9 @@ _Avoid_: pseudo-labeling (no hard threshold is assigned — the label *is* the c The DEAP-mapped algorithm families [[SeqOpt]] re-implements in **pure Python** (DEAP is a dev/test-only parity oracle, never a runtime dependency): **crossover** (uniform / one-point / two-point over mutation-sets), **mutation** (substitution / shift), **variation** (`varAnd` = crossover *and* mutation, `varOr` = one of crossover / mutation / reproduction), **survival** (`mu_plus_lambda` elitist / `mu_comma_lambda` / `ea_simple` generational), **constraints** (feasibility callables penalized DeltaPenalty- or ClosestValidPenalty-style), and the single-objective **Hall of Fame** (`SeqOpt.hall_of_fame_`) alongside the [[Pareto front]] archive. Counterpart to the multi-objective [[non-dominated rank]] / [[crowding distance]] selection core. _Avoid_: GA primitive (these are the EA layer); DEAP operator (ours is an independent re-implementation). -**engine (exact / fast)** (in [[SeqOpt]]): -The NSGA-II sort/selection kernel. `engine="exact"` is the pure-Python path whose RNG stream and crowding formula (`nobj·span` normalization) are matched to the **DEAP reference**, so on a fixed seed it reproduces DEAP's [[non-dominated rank]] + [[crowding distance]] ordering; `engine="fast"` vectorizes the O(n²) non-dominated sort with numpy and yields a **numerically identical** front (verified faster than DEAP). The orthogonal axis to [[guidance mode (impact / importance)]]. -_Avoid_: byte-exact (the bar is equivalence — identical front membership + ordering, values within tolerance — not byte-identical serialization). +**NSGA-II kernel / DEAP parity** (in [[SeqOpt]]): +The non-dominated sort + crowding formula (`nobj·span` normalization, matching DEAP's `assignCrowdingDist`) is a single pure-Python implementation with a **memory-bounded chunked-vectorized** dominance scan (an earlier `engine="exact"/"fast"` split was collapsed once both were shown to give byte-identical fronts). On a fixed seed it reproduces the **DEAP reference**'s [[non-dominated rank]] + [[crowding distance]] ordering (DEAP is a dev/test-only oracle; runtime stays DEAP-free), and the comparison benchmark shows it is faster than DEAP. +_Avoid_: byte-exact (the bar is equivalence — identical front membership + ordering, values within tolerance — not byte-identical serialization); engine (the `exact`/`fast` knob was removed). **convergence** (`SeqOpt.eval`): The optional generational-distance metric: the mean range-normalized distance from each [[Pareto front]] point to its nearest point on a user-supplied **reference front** (`ref_front`); lower = closer to the target. Joins `hypervolume` and `spread` in `df_eval` only when a reference is given. diff --git a/aaanalysis/_constants.py b/aaanalysis/_constants.py index 1fe3b5b0..0e109386 100644 --- a/aaanalysis/_constants.py +++ b/aaanalysis/_constants.py @@ -308,7 +308,6 @@ def _folder_path(super_folder, folder_name): LIST_SEQOPT_VARIATION = ["and", "or"] # varAnd (crossover AND mutation) | varOr (one of) LIST_SEQOPT_SURVIVAL = ["mu_plus_lambda", "mu_comma_lambda", "ea_simple"] LIST_SEQOPT_PENALTY = ["delta", "closest_valid"] # DeltaPenalty | ClosestValidPenalty semantics -LIST_SEQOPT_ENGINE = ["exact", "fast"] # pure-Python RNG-matched | numpy-vectorized LIST_SEQOPT_INIT = ["random", "suggest"] # random seeding | warm-start from SeqMut.suggest LIST_OBJECTIVE_GOALS = ["max", "min"] # Built-in objective sources (a callable(df_variant)->array is also accepted at run time). diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py b/aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py index 7dcf92e0..d95cda52 100644 --- a/aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py +++ b/aaanalysis/protein_design_pro/_backend/seqopt/nsga2.py @@ -57,30 +57,21 @@ def fast_non_dominated_sort(W) -> Tuple[List[List[int]], np.ndarray]: """ W = np.asarray(W, dtype=float) n = W.shape[0] - dominated_by = [[] for _ in range(n)] # solutions that i dominates - n_dominating = np.zeros(n, dtype=int) # how many dominate i - for i in range(n): - for j in range(i + 1, n): - if _dominates(W[i], W[j]): - dominated_by[i].append(j) - n_dominating[j] += 1 - elif _dominates(W[j], W[i]): - dominated_by[j].append(i) - n_dominating[i] += 1 - fronts: List[List[int]] = [] - current = [i for i in range(n) if n_dominating[i] == 0] + if n == 0: + return [], np.zeros(0, dtype=int) + dom = _dominance_matrix(W) # dom[i, j] == True <=> i dominates j + remaining = dom.sum(axis=0) # how many points dominate j rank = np.zeros(n, dtype=int) + placed = np.zeros(n, dtype=bool) + fronts: List[List[int]] = [] + current = np.where(remaining == 0)[0] f = 0 - while current: - fronts.append(sorted(current)) - nxt = [] - for i in current: - for j in dominated_by[i]: - n_dominating[j] -= 1 - if n_dominating[j] == 0: - rank[j] = f + 1 - nxt.append(j) - current = nxt + while current.size: + rank[current] = f + placed[current] = True + fronts.append(sorted(current.tolist())) + remaining = remaining - dom[current].sum(axis=0) + current = np.where((remaining == 0) & (~placed))[0] f += 1 return fronts, rank @@ -210,7 +201,7 @@ def select_nsga2(W, mu) -> Tuple[List[int], np.ndarray, np.ndarray]: return survivors, rank, crowding -# III Fast (vectorized) engine — numerically identical fronts, numpy-vectorized for speed +# III Vectorized dominance helper + rank/crowding assignment def _dominance_matrix(W, max_cells=4_000_000): """Boolean ``dom[i, j] = i dominates j`` via row-chunks (caps the (block, n, m) transient). @@ -231,79 +222,14 @@ def _dominance_matrix(W, max_cells=4_000_000): return dom -def fast_non_dominated_sort_vec(W) -> Tuple[List[List[int]], np.ndarray]: - """Vectorized fast non-dominated sort (``engine='fast'``). - - Produces the **same** fronts and ranks as :func:`fast_non_dominated_sort` (same dominance - relation, same ascending-index order within a front), computing the pairwise dominance - matrix with numpy in **row-chunks** (memory-bounded) instead of a Python double loop. - """ +def rank_and_crowding(W) -> Tuple[np.ndarray, np.ndarray]: + """Assign the non-dominated rank + crowding distance to every row.""" W = np.asarray(W, dtype=float) n = W.shape[0] - if n == 0: - return [], np.zeros(0, dtype=int) - dom = _dominance_matrix(W) # dom[i, j] == True <=> i dominates j - remaining = dom.sum(axis=0) # how many points dominate j - rank = np.zeros(n, dtype=int) - placed = np.zeros(n, dtype=bool) - fronts: List[List[int]] = [] - current = np.where(remaining == 0)[0] - f = 0 - while current.size: - rank[current] = f - placed[current] = True - fronts.append(sorted(current.tolist())) - remaining = remaining - dom[current].sum(axis=0) - current = np.where((remaining == 0) & (~placed))[0] - f += 1 - return fronts, rank - - -def rank_and_crowding(W, engine="exact") -> Tuple[np.ndarray, np.ndarray]: - """Assign rank + crowding to every row, via the exact or the fast (vectorized) sort. - - Both engines return identical ``rank`` and ``crowding`` (the fast path only vectorizes the - O(n^2) dominance scan); ``engine='fast'`` is a speed option, not a different result. - """ - W = np.asarray(W, dtype=float) - n = W.shape[0] - if engine == ut.LIST_SEQOPT_ENGINE[1]: # "fast" - fronts, rank = fast_non_dominated_sort_vec(W) - else: # "exact" - fronts, rank = fast_non_dominated_sort(W) + fronts, rank = fast_non_dominated_sort(W) crowding = np.zeros(n, dtype=float) for front in fronts: d = crowding_distance(W, front) for idx, member in enumerate(front): crowding[member] = d[idx] return rank, crowding - - -def select_nsga2_engine(W, mu, engine="exact") -> Tuple[List[int], np.ndarray, np.ndarray]: - """Engine-aware NSGA-II survival selection (exact or vectorized sort, identical result). - - The fast path fills survivors **front-by-front in the same order** as :func:`select_nsga2` - (ascending index within a front, partial front by descending crowding) so the survivor - list -- not just its set -- is identical to the exact engine; population order drives the - downstream RNG, so any order difference would otherwise cascade into different offspring. - """ - if engine != ut.LIST_SEQOPT_ENGINE[1]: # "exact" - return select_nsga2(W, mu) - W = np.asarray(W, dtype=float) - n = W.shape[0] - fronts, rank = fast_non_dominated_sort_vec(W) - crowding = np.zeros(n, dtype=float) - for front in fronts: - d = crowding_distance(W, front) - for idx, member in enumerate(front): - crowding[member] = d[idx] - survivors: List[int] = [] - for front in fronts: - if len(survivors) + len(front) <= mu: - survivors.extend(front) - else: - remaining = mu - len(survivors) - order = sorted(front, key=lambda i: (-crowding[i], i)) - survivors.extend(order[:remaining]) - break - return survivors, rank, crowding diff --git a/aaanalysis/protein_design_pro/_backend/seqopt/run.py b/aaanalysis/protein_design_pro/_backend/seqopt/run.py index a21f1885..0691dd34 100644 --- a/aaanalysis/protein_design_pro/_backend/seqopt/run.py +++ b/aaanalysis/protein_design_pro/_backend/seqopt/run.py @@ -9,8 +9,8 @@ import numpy as np import aaanalysis.utils as ut -from .nsga2 import (normalize_objectives_, fast_non_dominated_sort, crowding_distance, - dcd_tournament, select_nsga2, rank_and_crowding, select_nsga2_engine) +from .nsga2 import (normalize_objectives_, fast_non_dominated_sort, + dcd_tournament, select_nsga2, rank_and_crowding) from .genome import (crossover_uniform, crossover_npoint, mutate, init_population, canonical) from .metrics import hypervolume, spread @@ -110,18 +110,9 @@ def _update_archive(archive, genomes, F, goals): return {keys[i]: archive[keys[i]] for i in range(len(keys)) if rank[i] == 0} -def _front_rank_crowding(W, engine="exact") -> Tuple[np.ndarray, np.ndarray]: - """Assign every row its non-dominated rank and crowding distance (engine-aware).""" - if engine == ut.LIST_SEQOPT_ENGINE[1]: # "fast" - return rank_and_crowding(W, engine="fast") - n = W.shape[0] - fronts, rank = fast_non_dominated_sort(W) - crowding = np.zeros(n, dtype=float) - for front in fronts: - d = crowding_distance(W, front) - for idx, member in enumerate(front): - crowding[member] = d[idx] - return rank, crowding +def _front_rank_crowding(W) -> Tuple[np.ndarray, np.ndarray]: + """Assign every row its non-dominated rank and crowding distance.""" + return rank_and_crowding(W) # II Main Functions @@ -141,7 +132,6 @@ def evolve_nsga2(wt_seq: str, mut_prob: float = 0.2, survival: str = "mu_plus_lambda", variation: str = "and", - engine: str = "exact", hof_size: int = 10, suggest_seeds: Optional[List[Dict[int, str]]] = None, ) -> dict: @@ -150,14 +140,14 @@ def evolve_nsga2(wt_seq: str, Returns a dict with ``genomes`` (final population), ``F`` (raw objective matrix), ``rank``, ``crowding``, ``trajectory`` (per-generation hypervolume) and ``hall_of_fame`` (best-k single-objective genomes). ``variation`` selects varAnd/varOr; ``survival`` selects - (mu+lambda) / (mu,lambda) / eaSimple; ``engine`` selects the exact or vectorized sort. + (mu+lambda) / (mu,lambda) / eaSimple. """ weights = guide_fn(None) pop = init_population(pop_size, wt_seq, positions, alphabet, n_mut_max, rng, weights=weights, suggest_seeds=suggest_seeds) F = np.asarray(fitness_fn(pop), dtype=float) W = normalize_objectives_(F, goals) - rank, crowding = _front_rank_crowding(W, engine=engine) + rank, crowding = _front_rank_crowding(W) hof = _update_hof({}, pop, F, goals, hof_size) # Fixed reference (initial nadir) so the per-generation hypervolume is comparable across # generations -- under (mu+lambda) elitism the first front never worsens, so the trace is @@ -186,11 +176,11 @@ def evolve_nsga2(wt_seq: str, else: # "mu_plus_lambda" (default) pool, F_pool = pop + offspring, np.vstack([F, F_off]) W_pool = normalize_objectives_(F_pool, goals) - survivors, _, _ = select_nsga2_engine(W_pool, pop_size, engine=engine) + survivors, _, _ = select_nsga2(W_pool, pop_size) pop = [pool[i] for i in survivors] F = F_pool[survivors] W = normalize_objectives_(F, goals) - rank, crowding = _front_rank_crowding(W, engine=engine) + rank, crowding = _front_rank_crowding(W) trajectory.append(hypervolume(W, ref=hv_ref)) spread_traj.append(spread(W)) best_traj.append(_per_obj_best(F, goals, rank)) diff --git a/aaanalysis/protein_design_pro/_seqopt.py b/aaanalysis/protein_design_pro/_seqopt.py index 58398a19..7f06b7a5 100644 --- a/aaanalysis/protein_design_pro/_seqopt.py +++ b/aaanalysis/protein_design_pro/_seqopt.py @@ -165,7 +165,7 @@ class SeqOpt(Tool): Rows with no DEAP analogue are the aaanalysis value-add: the SHAP / ``feat_importance`` residue guidance (``mode``), the sequence genome + domain constraints (``n_mut_max``, ``region``, ``to_aa``), the ``algorithm="greedy"`` baseline, and any ``callable(sequence)`` - objective. ``engine="exact"/"fast"`` is an implementation detail (identical fronts). + objective. .. versionadded:: 1.0.0 @@ -379,7 +379,6 @@ def run(self, mut_prob: float = 0.2, survival: str = "mu_plus_lambda", variation: str = "and", - engine: str = "exact", constraints: Optional[List[Callable]] = None, penalty: str = "delta", hof_size: int = 10, @@ -434,9 +433,6 @@ def run(self, variation : str, default='and' Variation scheme: ``'and'`` (varAnd — crossover *and* mutation) or ``'or'`` (varOr — each offspring is crossover *or* mutation *or* reproduction; needs cx_prob+mut_prob<=1). - engine : str, default='exact' - ``'exact'`` (pure-Python, RNG-matched to the DEAP reference) or ``'fast'`` (numpy- - vectorized non-dominated sort; numerically identical fronts, faster). constraints : list of callable, optional Feasibility predicates ``genome -> bool`` (``True`` = feasible). Infeasible variants are penalized so the search avoids them. @@ -490,7 +486,6 @@ def run(self, list_str_options=ut.LIST_SEQOPT_SURVIVAL) ut.check_str_options(name="variation", val=variation, list_str_options=ut.LIST_SEQOPT_VARIATION) - ut.check_str_options(name="engine", val=engine, list_str_options=ut.LIST_SEQOPT_ENGINE) ut.check_str_options(name="penalty", val=penalty, list_str_options=ut.LIST_SEQOPT_PENALTY) ut.check_str_options(name="init", val=init, list_str_options=ut.LIST_SEQOPT_INIT) ut.check_number_range(name="pop_size", val=pop_size, min_val=2, just_int=True) @@ -538,7 +533,7 @@ def run(self, pop_size=pop_size, n_gen=n_gen, n_mut_max=n_mut_max, crossover=crossover, mutation=mutation, cx_prob=cx_prob, mut_prob=mut_prob, survival=survival, variation=variation, - engine=engine, hof_size=hof_size, suggest_seeds=suggest_seeds) + hof_size=hof_size, suggest_seeds=suggest_seeds) self.trajectory_ = list(res["trajectory"]) # Hall of Fame: best-k single-objective variants (labels) across all generations. self.hall_of_fame_ = [variant_label(wt_seq, g) for g in res.get("hall_of_fame", [])] diff --git a/docs/adr/0045-seqopt-deap-parity-and-pure-python-operators.md b/docs/adr/0045-seqopt-deap-parity-and-pure-python-operators.md index 3240e314..338ebe97 100644 --- a/docs/adr/0045-seqopt-deap-parity-and-pure-python-operators.md +++ b/docs/adr/0045-seqopt-deap-parity-and-pure-python-operators.md @@ -30,10 +30,11 @@ Fame** (`hall_of_fame_`) beside the Pareto archive, and **hypervolume / spread / metrics. Exposed via `run` params `variation`, `survival`, `constraints`, `penalty`, `hof_size` and `eval`'s `ref_front`. -**D2 — Two engines, identical results.** `engine="exact"` is the pure-Python kernel whose crowding -formula matches DEAP's `assignCrowdingDist` (`nobj·span` normalization); `engine="fast"` vectorizes -the O(n²) non-dominated sort with numpy and returns a **numerically identical** front (same survivor -list, not just set). `fast` is purely a speed option. +**D2 — A single vectorized kernel.** The non-dominated sort uses a **memory-bounded chunked +numpy** dominance scan; its crowding formula matches DEAP's `assignCrowdingDist` (`nobj·span` +normalization). *(Amended: an initial `engine="exact"/"fast"` parameter exposed a pure-Python loop +vs. the vectorized path; once verified to give byte-identical fronts, the redundant knob was removed +and the vectorized kernel kept as the single implementation.)* **D3 — DEAP is a dev/test-only parity oracle.** `deap` is added to the **`[dev]`** extra; the shipped runtime never imports it. Parity is asserted at the **selection-primitive** level on @@ -44,10 +45,9 @@ synthetic fitness sets (the algorithm-agnostic, robust place to compare): our `f ties on the partial front). The bar is **equivalence, not byte-identity** (per ADR-0043). **D4 — Ship ours.** The Phase-C comparison (`.github/scripts/seqopt_deap_comparison.py`) benchmarks -ours-`exact` / ours-`fast` / DEAP across `pop_size × n_objectives`: all three are correctness- -identical, and `engine="fast"` is **faster than DEAP** (e.g. ~14 ms vs ~102 ms at 500×3) while -keeping the runtime dependency-free. Decision: **ship the pure-Python implementation** (`fast` for -speed, `exact` as the RNG-matched reference); DEAP stays dev-only. +ours vs. DEAP across `pop_size × n_objectives`: correctness-identical, and ours is **faster than +DEAP** (e.g. ~14 ms vs ~102 ms at 500×3) while keeping the runtime dependency-free. Decision: **ship +the pure-Python implementation** (the single vectorized kernel); DEAP stays dev-only. ## Rejected alternatives diff --git a/docs/source/index/release_notes.rst b/docs/source/index/release_notes.rst index 43160715..6cb4fbec 100644 --- a/docs/source/index/release_notes.rst +++ b/docs/source/index/release_notes.rst @@ -118,7 +118,7 @@ Added one- / two-point), ``mutation`` (substitution / shift), ``variation`` (varAnd / varOr), ``survival`` ((mu+lambda) / (mu,lambda) / eaSimple), ``constraints`` (delta / closest-valid penalty), a single-objective Hall of Fame (``hall_of_fame_``), and a memory-bounded - ``engine="exact"|"fast"`` switch (identical fronts). Objectives accept any + (chunked) vectorized non-dominated sort. Objectives accept any ``callable(sequence) -> float`` (an external scikit / torch model or sequence-level tool / web API), cached per variant. ``run`` returns ``df_pareto`` (objective columns + ``rank`` + ``crowding``) backed by a cumulative Pareto archive; ``eval`` reports diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 24c3c16b..517a2985 100755 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -84,4 +84,4 @@ mutation map and lineage. .. toctree:: :maxdepth: 1 - generated/tutorial7_protein_design \ No newline at end of file + generated/tutorial7_protein_engineering \ No newline at end of file diff --git a/tests/unit/protein_design_pro_tests/test_seqopt.py b/tests/unit/protein_design_pro_tests/test_seqopt.py index 6084c038..fb467e12 100644 --- a/tests/unit/protein_design_pro_tests/test_seqopt.py +++ b/tests/unit/protein_design_pro_tests/test_seqopt.py @@ -419,17 +419,6 @@ def test_variation_or_prob_sum_gt_one_raises(self, seqopt, wt, df_feat): with pytest.raises(ValueError, match="cx_prob"): seqopt.run(variation="or", cx_prob=0.8, mut_prob=0.5, **self._base(wt, df_feat)) - def test_engine_fast(self, seqopt, wt, df_feat): - df = seqopt.run(engine="fast", **self._base(wt, df_feat)) - assert _non_dominated(df) - - def test_engine_exact_equals_fast(self, model, wt, df_feat): - kw = dict(df_seq=wt, df_feat=df_feat, objectives=OBJ, pop_size=10, n_gen=5, - n_mut_max=3, region="tmd", seed=11) - ex = SeqOpt(mode="importance", model=model).run(engine="exact", **kw) - fa = SeqOpt(mode="importance", model=model).run(engine="fast", **kw) - assert set(ex[ut.COL_VARIANT]) == set(fa[ut.COL_VARIANT]) - def test_survival_ea_simple(self, seqopt, wt, df_feat): df = seqopt.run(survival="ea_simple", **self._base(wt, df_feat)) assert len(df) >= 1 @@ -459,10 +448,6 @@ def test_eval_convergence_with_ref_front(self, seqopt, wt, df_feat): de = seqopt.eval(df_pareto=df, ref_front=[[5.0, 1.0], [3.0, 1.0]]) assert ut.COL_CONVERGENCE in de.columns and de.iloc[0][ut.COL_CONVERGENCE] >= 0 - def test_bad_engine_raises(self, seqopt, wt, df_feat): - with pytest.raises(ValueError): - seqopt.run(engine="cuda", **self._base(wt, df_feat)) - def test_bad_variation_raises(self, seqopt, wt, df_feat): with pytest.raises(ValueError): seqopt.run(variation="xor", **self._base(wt, df_feat)) diff --git a/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py b/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py index f6801af2..d593b007 100644 --- a/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py +++ b/tests/unit/protein_design_pro_tests/test_seqopt_deap_parity.py @@ -6,8 +6,7 @@ * identical non-dominated **rank** (front membership) for every point, * identical within-front **crowding ordering** + crowding values within ``atol``, -* identical ``selNSGA2`` survivor **set**, -* both SeqOpt engines (``exact`` / ``fast``) agree with DEAP (and each other). +* identical ``selNSGA2`` survivor profile. Skipped when ``deap`` is not installed (core-only environments). """ @@ -20,8 +19,7 @@ from deap import base, creator, tools # noqa: E402 from aaanalysis.protein_design_pro._backend.seqopt.nsga2 import ( # noqa: E402 - normalize_objectives_, fast_non_dominated_sort, fast_non_dominated_sort_vec, - crowding_distance, select_nsga2, select_nsga2_engine) + normalize_objectives_, fast_non_dominated_sort, crowding_distance, select_nsga2) settings.register_profile("ci", deadline=None) settings.load_profile("ci") @@ -98,13 +96,6 @@ def test_rank_matches_deap(self, seed, n, m): _, ours = fast_non_dominated_sort(W) assert ours.tolist() == _deap_rank(W).tolist() - @settings(max_examples=10, deadline=None) - @given(seed=some.integers(0, 200), n=some.integers(3, 25), m=some.integers(2, 4)) - def test_fast_sort_matches_deap(self, seed, n, m): - W = _rand_W(seed, n, m) - _, ours = fast_non_dominated_sort_vec(W) - assert ours.tolist() == _deap_rank(W).tolist() - def test_golden_three_point_front(self): W = normalize_objectives_(np.array([[1., 0.], [0., 1.], [0.5, 0.5], [0., 0.]]), ["max", "max"]) @@ -166,7 +157,6 @@ def profile(idxs): theirs = profile(_deap_select(W, mu)) assert profile(select_nsga2(W, mu)[0]) == theirs - assert profile(select_nsga2_engine(W, mu, engine="fast")[0]) == theirs @settings(max_examples=15, deadline=None) @given(seed=some.integers(0, 200), n=some.integers(6, 25), m=some.integers(2, 3)) @@ -181,9 +171,3 @@ def test_survivor_rank_distribution_matches_deap_with_ties(self, seed, n, m): ours = sorted(int(rank[i]) for i in select_nsga2(W, mu)[0]) theirs = sorted(int(rank[i]) for i in _deap_select(W, mu)) assert ours == theirs - - def test_engines_agree_on_survivor_order(self): - W = _rand_W(7, 20, 2) - ex = select_nsga2(W, 10)[0] - fa = select_nsga2_engine(W, 10, engine="fast")[0] - assert ex == fa diff --git a/tutorials/tutorial7_protein_design.ipynb b/tutorials/tutorial7_protein_engineering.ipynb similarity index 100% rename from tutorials/tutorial7_protein_design.ipynb rename to tutorials/tutorial7_protein_engineering.ipynb