From 1f57f9ffc8942fad9e078c37fa0c3549ca520ac8 Mon Sep 17 00:00:00 2001 From: garnet Date: Tue, 12 May 2026 21:11:12 -0500 Subject: [PATCH] fix(orchestrator): lazy threadpool imports for pyodide consumers (sy-2wa) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move `import threading` + `from concurrent.futures import ThreadPoolExecutor, as_completed` from module top to function-local imports in orchestrator, synthesis, and perturbation. The `synth_panel.ensemble` load chain is now fully threadpool-free at module load time, unblocking Cloudflare Python Workers / pyodide consumers (boardroom DECISION-skill officers) where ThreadPoolExecutor exists as a stub but `.submit()` silently hangs. Adds tests/test_threadpool_lazy_import.py — subprocess-based load-chain hygiene assertions: (1) concurrent.futures stays out of sys.modules after a fresh ensemble import, (2) ensemble loads cleanly under a poisoned concurrent.futures module, (3) ThreadPoolExecutor/as_completed are never bound at the top of orchestrator/synthesis/perturbation. Bumps __version__ to 1.5.0 + refreshes server-card.json / site renders. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 31 ++++++ site/.well-known/mcp/server-card.json | 6 +- site/index.html | 8 +- site/index.md | 2 +- src/synth_panel/__version__.py | 2 +- src/synth_panel/orchestrator.py | 24 +++- src/synth_panel/perturbation.py | 5 +- src/synth_panel/synthesis.py | 7 +- tests/test_threadpool_lazy_import.py | 151 ++++++++++++++++++++++++++ 9 files changed, 222 insertions(+), 14 deletions(-) create mode 100644 tests/test_threadpool_lazy_import.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 51210bc..cb3fe19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,37 @@ For auto-generated release notes, see [GitHub Releases](https://github.com/DataV (Empty — next-cycle work lands here.) +## [1.5.0] - 2026-05-13 + +Pyodide / Cloudflare Python Workers consumers can now import +``synth_panel.ensemble`` without dragging ``ThreadPoolExecutor`` into +the load chain. Boardroom (and any other Workers-style consumer that +adopts ``synthesize_panel``) is unblocked: the ensemble surface is +fully threadpool-free at load time, so transitively-bound ``.submit()`` +calls can no longer deadlock the Worker runtime. + +### Changed + +- **Lazy threading imports across the ensemble load chain (sy-2wa).** + ``synth_panel.orchestrator``, ``synth_panel.synthesis``, and + ``synth_panel.perturbation`` no longer bind ``ThreadPoolExecutor`` / + ``as_completed`` / ``threading`` at module top. The imports are + hoisted into the threaded entry points (``run_panel_parallel``, + ``synthesize_panel_mapreduce``, ``generate_panel_variants_parallel``, + ``WorkerRegistry.__init__``) so ``from synth_panel.ensemble import + synthesize_panel`` never touches ``concurrent.futures``. Boardroom's + 22 s ``asyncio.wait_for`` fallback around ``synthesize_panel`` (PR #11) + can now be removed (or kept as defense-in-depth). + +### Added + +- **``tests/test_threadpool_lazy_import.py`` (sy-2wa).** CI test that + asserts ``concurrent.futures`` stays out of ``sys.modules`` after a + fresh ``synth_panel.ensemble`` load, runs the same import against a + poisoned ``concurrent.futures`` (any access raises), and pins the + ``orchestrator`` / ``synthesis`` / ``perturbation`` module namespaces + as ``ThreadPoolExecutor``-free. + ## [1.4.0] - 2026-05-12 OpenRouter cost actuals are now surfaced explicitly alongside the local diff --git a/site/.well-known/mcp/server-card.json b/site/.well-known/mcp/server-card.json index 5c68611..5660000 100644 --- a/site/.well-known/mcp/server-card.json +++ b/site/.well-known/mcp/server-card.json @@ -3,7 +3,7 @@ "name": "io.github.DataViking-Tech/synthpanel", "title": "SynthPanel", "description": "Run synthetic focus groups using AI personas. 12 MCP tools for single prompts, full panel runs, and v3 branching (adaptive) instruments across any LLM provider (Claude, OpenAI, Gemini, xAI).", - "version": "1.4.0", + "version": "1.5.0", "websiteUrl": "https://synthpanel.dev", "repository": { "url": "https://github.com/DataViking-Tech/SynthPanel", @@ -11,7 +11,7 @@ }, "serverInfo": { "name": "synthpanel", - "version": "1.4.0" + "version": "1.5.0" }, "capabilities": { "tools": { "listChanged": false }, @@ -23,7 +23,7 @@ "registryType": "pypi", "registryBaseUrl": "https://pypi.org", "identifier": "synthpanel", - "version": "1.4.0", + "version": "1.5.0", "runtimeHint": "uvx", "runtimeArguments": [ { "type": "positional", "value": "synthpanel[mcp]" }, diff --git a/site/index.html b/site/index.html index 08722d0..803143c 100644 --- a/site/index.html +++ b/site/index.html @@ -45,8 +45,8 @@ "applicationCategory": "DeveloperApplication", "applicationSubCategory": "Research Tool", "operatingSystem": "Cross-platform", - "softwareVersion": "1.4.0", - "dateModified": "2026-05-12", + "softwareVersion": "1.5.0", + "dateModified": "2026-05-13", "license": "https://opensource.org/licenses/MIT", "codeRepository": "https://github.com/DataViking-Tech/SynthPanel", "downloadUrl": "https://pypi.org/project/synthpanel/", @@ -140,7 +140,7 @@ class="mb-4 inline-flex items-center gap-2 rounded-full border border-emerald-400/30 bg-emerald-400/5 px-3 py-1 text-xs font-medium text-emerald-300" > - v1.4.0 — public beta + v1.5.0 — public beta

class="mt-4 flex flex-wrap items-center justify-between gap-3 border-t border-slate-800 py-6 text-xs text-slate-500" > - © 2026 DataViking · MIT-licensed · v1.4.0 · + © 2026 DataViking · MIT-licensed · v1.5.0 · None: + # sy-2wa: lazy threading import keeps synth_panel.ensemble loadable + # under pyodide (CF Python Workers). Bare `threading` is a no-op + # stub there; we still avoid binding it at module level so the + # whole load chain stays threadpool-free until a real run. + import threading + self._lock = threading.RLock() self._workers: dict[str, Worker] = {} @@ -1484,6 +1495,13 @@ def run_panel_parallel( # always-on prefix caching still applies. min_stratum_pop = _min_stratum_population(personas, questions) + # sy-2wa: lazy threading + concurrent.futures imports. Bound here so + # `from synth_panel.ensemble import synthesize_panel` never pulls + # ThreadPoolExecutor into the module's namespace under pyodide + # (CF Python Workers), where `.submit()` silently hangs. + import threading + from concurrent.futures import ThreadPoolExecutor, as_completed + registry = WorkerRegistry() effective_workers = max_workers or len(personas) sentiment_cache: dict[str, str] = {} diff --git a/src/synth_panel/perturbation.py b/src/synth_panel/perturbation.py index 717125c..68e1eb9 100644 --- a/src/synth_panel/perturbation.py +++ b/src/synth_panel/perturbation.py @@ -8,7 +8,6 @@ from __future__ import annotations import logging -from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from enum import Enum from typing import Any @@ -328,6 +327,10 @@ def _gen(p: dict[str, Any]) -> VariantSet: if workers <= 1 or len(personas) <= 1: return [_gen(p) for p in personas] + # sy-2wa: lazy import keeps `synth_panel.ensemble` load chain + # threadpool-free for pyodide consumers (CF Python Workers). + from concurrent.futures import ThreadPoolExecutor + with ThreadPoolExecutor(max_workers=workers) as pool: futures = [pool.submit(_gen, p) for p in personas] return [f.result() for f in futures] diff --git a/src/synth_panel/synthesis.py b/src/synth_panel/synthesis.py index f6f7944..ed19802 100644 --- a/src/synth_panel/synthesis.py +++ b/src/synth_panel/synthesis.py @@ -22,7 +22,6 @@ import re import sys from collections.abc import Coroutine -from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from typing import Any, Protocol, runtime_checkable @@ -1361,6 +1360,12 @@ def _run_one_map(idx: int) -> tuple[int, SynthesisResult, dict[str, Any]]: ) return idx, res, meta + # sy-2wa: lazy concurrent.futures import. Keeps `from synth_panel.ensemble + # import synthesize_panel` ThreadPoolExecutor-free for pyodide consumers + # (CF Python Workers), where `.submit()` silently hangs. Map-reduce + # is opt-in via STRATEGY_MAP_REDUCE and never reached on pyodide. + from concurrent.futures import ThreadPoolExecutor, as_completed + map_results: list[SynthesisResult | None] = [None] * n map_meta: list[dict[str, Any] | None] = [None] * n with ThreadPoolExecutor(max_workers=workers) as executor: diff --git a/tests/test_threadpool_lazy_import.py b/tests/test_threadpool_lazy_import.py new file mode 100644 index 0000000..13545c3 --- /dev/null +++ b/tests/test_threadpool_lazy_import.py @@ -0,0 +1,151 @@ +"""sy-2wa: load-chain hygiene for pyodide consumers (CF Python Workers). + +Under pyodide, ``concurrent.futures.ThreadPoolExecutor`` exists as a stub +but ``.submit()`` silently hangs. Binding it in the load chain of +``synth_panel.ensemble`` means any downstream code path that touches the +synthpanel namespace can reach a submit() that will deadlock the Worker. + +These tests pin the contract: + +1. ``from synth_panel.ensemble import synthesize_panel`` must not import + ``concurrent.futures`` at any point in the load chain. +2. Under a pyodide-emulating sys.modules patch where importing + ``concurrent.futures`` raises, the same import still succeeds — the + ensemble surface is fully threadpool-free at load time. +3. Module namespaces of ``orchestrator``, ``synthesis``, and + ``perturbation`` must not bind ``ThreadPoolExecutor`` until the + threaded entry points are actually called. + +We use subprocesses for the load-chain assertions because pytest collects +every test in one interpreter — by the time these tests run, +``synth_panel.*`` (and transitively ``concurrent.futures``) are already +in ``sys.modules``. A subprocess gives us a fresh interpreter so the +"never imported" claim is observable rather than inferred. +""" + +from __future__ import annotations + +import subprocess +import sys +import textwrap + + +def _run_in_subprocess(script: str) -> subprocess.CompletedProcess[str]: + """Run ``script`` in a fresh interpreter and return the result. + + Uses the same Python executable as the test runner so the editable + ``synth_panel`` install is on the path. Stderr is captured so failures + surface in pytest output. + """ + return subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + check=False, + ) + + +def test_ensemble_load_does_not_import_concurrent_futures() -> None: + """Acceptance criterion 1 (sy-2wa): ``from synth_panel.ensemble import + synthesize_panel`` must not pull ``concurrent.futures`` into sys.modules. + """ + script = textwrap.dedent( + """ + import sys + from synth_panel.ensemble import synthesize_panel # noqa: F401 + + if "concurrent.futures" in sys.modules: + print("LEAK: concurrent.futures imported during ensemble load") + sys.exit(1) + if "concurrent" in sys.modules: + print("LEAK: concurrent (parent) imported during ensemble load") + sys.exit(1) + print("OK") + """ + ) + result = _run_in_subprocess(script) + assert result.returncode == 0, ( + f"synth_panel.ensemble load chain pulled in concurrent.futures.\n" + f"stdout: {result.stdout}\nstderr: {result.stderr}" + ) + assert "OK" in result.stdout + + +def test_ensemble_loads_with_poisoned_concurrent_futures() -> None: + """Acceptance criterion 2 (sy-2wa): emulate pyodide by poisoning + ``concurrent.futures`` so any access raises. ``synth_panel.ensemble`` + must still load — proving the load chain truly never touches it. + """ + script = textwrap.dedent( + """ + import sys + import types + + # Poison: any attribute lookup raises. ``from concurrent.futures + # import ThreadPoolExecutor`` runs __getattr__ on this module after + # the import system resolves the submodule from sys.modules. + poison = types.ModuleType("concurrent.futures") + + def _poisoned_attr(name): + raise AssertionError( + f"sy-2wa regression: concurrent.futures.{name} accessed " + "during synth_panel.ensemble load" + ) + + poison.__getattr__ = _poisoned_attr + parent = types.ModuleType("concurrent") + parent.futures = poison + sys.modules["concurrent"] = parent + sys.modules["concurrent.futures"] = poison + + from synth_panel.ensemble import synthesize_panel # noqa: F401 + print("OK") + """ + ) + result = _run_in_subprocess(script) + assert result.returncode == 0, ( + f"synth_panel.ensemble failed to load under poisoned concurrent.futures.\n" + f"stdout: {result.stdout}\nstderr: {result.stderr}" + ) + assert "OK" in result.stdout + + +def test_threadpoolexecutor_not_bound_in_module_namespaces() -> None: + """Acceptance criterion 3 (sy-2wa): the modules that *do* spawn + threadpools (orchestrator, synthesis, perturbation) must not bind + ``ThreadPoolExecutor`` / ``as_completed`` at module top, even after + they've been loaded. Catches a regression where someone re-adds the + top-level import "for convenience". + """ + script = textwrap.dedent( + """ + import sys + from synth_panel.ensemble import synthesize_panel # noqa: F401 + import synth_panel.orchestrator + import synth_panel.synthesis + import synth_panel.perturbation + + leaks = [] + for modname in ( + "synth_panel.orchestrator", + "synth_panel.synthesis", + "synth_panel.perturbation", + ): + mod = sys.modules[modname] + for sym in ("ThreadPoolExecutor", "as_completed"): + if hasattr(mod, sym): + leaks.append(f"{modname}.{sym}") + + if leaks: + print("LEAKS:", leaks) + sys.exit(1) + print("OK") + """ + ) + result = _run_in_subprocess(script) + assert result.returncode == 0, ( + f"Threadpool symbols bound at module top — move imports inside " + f"threaded functions (sy-2wa).\n" + f"stdout: {result.stdout}\nstderr: {result.stderr}" + ) + assert "OK" in result.stdout