diff --git a/lite_bootstrap/bootstrappers/fastapi_bootstrapper.py b/lite_bootstrap/bootstrappers/fastapi_bootstrapper.py index 5cba5e5..ba99adc 100644 --- a/lite_bootstrap/bootstrappers/fastapi_bootstrapper.py +++ b/lite_bootstrap/bootstrappers/fastapi_bootstrapper.py @@ -76,7 +76,9 @@ def __post_init__(self) -> None: def _narrow_app(config: "FastAPIConfig") -> "fastapi.FastAPI": - assert not isinstance(config.application, UnsetType) + if isinstance(config.application, UnsetType): + msg = "FastAPIConfig.application is UNSET; __post_init__ did not run" + raise TypeError(msg) return config.application @@ -198,6 +200,17 @@ def __init__(self, bootstrap_config: FastAPIConfig) -> None: super().__init__(bootstrap_config) application = _narrow_app(self.bootstrap_config) + # FastAPI's lifespan_context is opaque after wrap; tag the app instance directly + # rather than squatting in Starlette's user-facing application.state namespace. + if getattr(application, "_lite_bootstrap_lifespan_attached", False): + warnings.warn( + "FastAPI application already has a lite-bootstrap lifespan wrapper attached; " + "skipping re-wrap. This FastAPIBootstrapper's teardown will not be invoked on " + "ASGI shutdown — construct one FastAPIBootstrapper per application.", + stacklevel=2, + ) + return + application._lite_bootstrap_lifespan_attached = True # noqa: SLF001 # ty: ignore[unresolved-attribute] old_lifespan_manager = application.router.lifespan_context application.router.lifespan_context = _merge_lifespan_context( old_lifespan_manager, diff --git a/lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py b/lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py index 2fed48d..52135b0 100644 --- a/lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py +++ b/lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py @@ -2,6 +2,7 @@ import dataclasses import time import typing +import warnings from lite_bootstrap import import_checker from lite_bootstrap.bootstrappers.base import BaseBootstrapper @@ -151,6 +152,14 @@ def is_ready(self) -> bool: def __init__(self, bootstrap_config: FastMcpConfig) -> None: super().__init__(bootstrap_config) + if any(isinstance(p, _TeardownProvider) for p in self.bootstrap_config.application.providers): + warnings.warn( + "FastMCP application already has a _TeardownProvider attached; skipping re-attachment. " + "This FastMcpBootstrapper's teardown will not be invoked on ASGI shutdown — " + "construct one FastMcpBootstrapper per application.", + stacklevel=2, + ) + return self.bootstrap_config.application.add_provider(_TeardownProvider(self.teardown)) def _prepare_application(self) -> "FastMCP[typing.Any]": diff --git a/lite_bootstrap/bootstrappers/faststream_bootstrapper.py b/lite_bootstrap/bootstrappers/faststream_bootstrapper.py index 40974b9..6d78517 100644 --- a/lite_bootstrap/bootstrappers/faststream_bootstrapper.py +++ b/lite_bootstrap/bootstrappers/faststream_bootstrapper.py @@ -110,6 +110,8 @@ async def _define_health_status(self) -> bool: @dataclasses.dataclass(kw_only=True) class FastStreamLoggingInstrument(LoggingInstrument): bootstrap_config: FastStreamConfig + _prior_broker_params_storage: typing.Any = dataclasses.field(default=None, init=False, repr=False, compare=False) + _broker_logger_replaced: bool = dataclasses.field(default=False, init=False, repr=False, compare=False) def bootstrap(self) -> None: super().bootstrap() @@ -117,7 +119,18 @@ def bootstrap(self) -> None: if broker is not None and import_checker.is_structlog_installed and import_checker.is_faststream_installed: logger = structlog.get_logger("faststream") logger.setLevel(self.bootstrap_config.faststream_log_level) + self._prior_broker_params_storage = broker.config.logger.params_storage broker.config.logger.params_storage = ManualLoggerStorage(logger) + self._broker_logger_replaced = True + + def teardown(self) -> None: + if self._broker_logger_replaced: + broker = self.bootstrap_config.application.broker + if broker is not None: + broker.config.logger.params_storage = self._prior_broker_params_storage + self._broker_logger_replaced = False + self._prior_broker_params_storage = None + super().teardown() @dataclasses.dataclass(kw_only=True) diff --git a/lite_bootstrap/bootstrappers/litestar_bootstrapper.py b/lite_bootstrap/bootstrappers/litestar_bootstrapper.py index 901accc..a7411ad 100644 --- a/lite_bootstrap/bootstrappers/litestar_bootstrapper.py +++ b/lite_bootstrap/bootstrappers/litestar_bootstrapper.py @@ -1,6 +1,8 @@ +import contextlib import dataclasses import pathlib import typing +import weakref from lite_bootstrap import import_checker from lite_bootstrap.bootstrappers.base import BaseBootstrapper @@ -76,9 +78,10 @@ class LitestarOpenTelemetryInstrumentationMiddleware(ASGIMiddleware): def __init__(self, tracer_provider: "TracerProvider", excluded_urls: set[str]) -> None: self._tracer_provider = tracer_provider self._excluded_urls = ",".join(excluded_urls) - # Cache keyed by id(next_app); Litestar's ASGI app instances are stable for - # the middleware lifetime, so id-reuse-after-GC isn't a concern. - self._otel_apps: dict[int, ASGIApp] = {} + # WeakKeyDictionary so wrapper apps are evicted when Litestar drops the + # next_app reference (hot reload, plugin add/remove, AppConfig rebuild). + # Apps that don't support weak references are simply not cached. + self._otel_apps: weakref.WeakKeyDictionary[ASGIApp, ASGIApp] = weakref.WeakKeyDictionary() async def handle( self, @@ -87,16 +90,19 @@ async def handle( send: "Send", next_app: "ASGIApp", ) -> None: - otel_app = self._otel_apps.get(id(next_app)) + otel_app: ASGIApp | None = None + with contextlib.suppress(TypeError): + otel_app = self._otel_apps.get(next_app) if otel_app is None: - otel_app = OpenTelemetryMiddleware( + otel_app = OpenTelemetryMiddleware( # ty: ignore[invalid-assignment] app=next_app, default_span_details=build_litestar_route_details_from_scope, excluded_urls=self._excluded_urls, tracer_provider=self._tracer_provider, ) - self._otel_apps[id(next_app)] = otel_app # ty: ignore[invalid-assignment] - await otel_app(scope, receive, send) # ty: ignore[invalid-argument-type] + with contextlib.suppress(TypeError): + self._otel_apps[next_app] = otel_app # ty: ignore[invalid-assignment] + await otel_app(scope, receive, send) # ty: ignore[call-non-callable] @dataclasses.dataclass(kw_only=True, slots=True, frozen=True) diff --git a/lite_bootstrap/instruments/logging_instrument.py b/lite_bootstrap/instruments/logging_instrument.py index 37ef81e..bc7eb4d 100644 --- a/lite_bootstrap/instruments/logging_instrument.py +++ b/lite_bootstrap/instruments/logging_instrument.py @@ -4,6 +4,7 @@ import typing from lite_bootstrap import import_checker +from lite_bootstrap.exceptions import TeardownError from lite_bootstrap.instruments.base import BaseConfig, BaseInstrument from lite_bootstrap.instruments.logging_factory import ( AddressProtocol, @@ -164,15 +165,31 @@ def teardown(self) -> None: """Reset structlog and root logger. Root logger level is unconditionally set to WARNING; pre-existing user configuration is overwritten. + + Best-effort cleanup: errors from individual ``handler.close()`` calls and from the memory + logger factory's ``close_handlers()`` are collected and re-raised together via + :class:`~lite_bootstrap.exceptions.TeardownError` after the rest of teardown completes, + so no single misbehaving cleanup step prevents the instrument from releasing the + rest of its resources. """ structlog.reset_defaults() root_logger = logging.getLogger() - for h in root_logger.handlers[:]: - root_logger.removeHandler(h) - h.close() - root_logger.setLevel(logging.WARNING) - if self._logger_factory is not None: - try: - self._logger_factory.close_handlers() - finally: - self._logger_factory = None + errors: list[tuple[str, BaseException]] = [] + try: + for h in root_logger.handlers[:]: + root_logger.removeHandler(h) + try: + h.close() + except Exception as e: # noqa: BLE001 + errors.append((type(h).__name__, e)) + root_logger.setLevel(logging.WARNING) + finally: + if self._logger_factory is not None: + try: + self._logger_factory.close_handlers() + except Exception as e: # noqa: BLE001 + errors.append(("MemoryLoggerFactory", e)) + finally: + self._logger_factory = None + if errors: + raise TeardownError(errors) from errors[0][1] diff --git a/lite_bootstrap/instruments/opentelemetry_instrument.py b/lite_bootstrap/instruments/opentelemetry_instrument.py index 9c1c640..65ca701 100644 --- a/lite_bootstrap/instruments/opentelemetry_instrument.py +++ b/lite_bootstrap/instruments/opentelemetry_instrument.py @@ -79,11 +79,26 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: # pragma: no cover @dataclasses.dataclass(kw_only=True, slots=True) class OpenTelemetryInstrument(BaseInstrument[OpenTelemetryConfig]): + """OpenTelemetry tracing instrument. + + Lifecycle note: ``bootstrap()`` calls ``opentelemetry.trace.set_tracer_provider``, + which the OTel SDK enforces as **set-once per process** (subsequent calls log + "Overriding of current TracerProvider is not allowed" and have no effect). + ``teardown()`` calls ``shutdown()`` on the provider, which flushes batched + spans and closes exporters, but it cannot reset the process-global pointer — + callers of ``opentelemetry.trace.get_tracer_provider()`` after teardown will + still receive the shut-down provider. The supported lifecycle is one + ``OpenTelemetryInstrument`` per process; do not bootstrap a second instance. + """ + not_ready_message = "opentelemetry_endpoint is empty and opentelemetry_log_traces is False" missing_dependency_message = "opentelemetry is not installed" _tracer_provider: "TracerProvider | None" = dataclasses.field( default_factory=lambda: None, init=False, repr=False, compare=False ) + _prior_logger_disabled: dict[str, bool] = dataclasses.field( + default_factory=dict, init=False, repr=False, compare=False + ) @classmethod def is_configured(cls, bootstrap_config: "OpenTelemetryConfig") -> bool: @@ -105,8 +120,10 @@ def _build_excluded_urls(self) -> set[str]: return excluded_urls def bootstrap(self) -> None: - logging.getLogger("opentelemetry.instrumentation.instrumentor").disabled = True - logging.getLogger("opentelemetry.trace").disabled = True + for logger_name in ("opentelemetry.instrumentation.instrumentor", "opentelemetry.trace"): + otel_logger = logging.getLogger(logger_name) + self._prior_logger_disabled[logger_name] = otel_logger.disabled + otel_logger.disabled = True attributes = { resources.SERVICE_NAME: self.bootstrap_config.opentelemetry_service_name or self.bootstrap_config.service_name, @@ -149,6 +166,9 @@ def teardown(self) -> None: one_instrumentor.instrumentor.uninstrument(**one_instrumentor.additional_params) else: one_instrumentor.uninstrument() + for logger_name, prior in self._prior_logger_disabled.items(): + logging.getLogger(logger_name).disabled = prior + self._prior_logger_disabled.clear() if self._tracer_provider is not None: try: self._tracer_provider.shutdown() diff --git a/lite_bootstrap/instruments/pyroscope_instrument.py b/lite_bootstrap/instruments/pyroscope_instrument.py index c3631b4..2113931 100644 --- a/lite_bootstrap/instruments/pyroscope_instrument.py +++ b/lite_bootstrap/instruments/pyroscope_instrument.py @@ -32,9 +32,11 @@ def check_dependencies() -> bool: return import_checker.is_pyroscope_installed def bootstrap(self) -> None: - # is_configured() guarantees pyroscope_endpoint is set; assert documents the precondition - # for type narrowing and for direct callers that bypass the bootstrapper. - assert self.bootstrap_config.pyroscope_endpoint is not None + # is_configured() guarantees pyroscope_endpoint is set when called via the + # bootstrapper. Direct callers bypassing is_configured see an explicit raise. + if self.bootstrap_config.pyroscope_endpoint is None: + msg = "pyroscope_endpoint is unset; PyroscopeInstrument.is_configured() should have returned False" + raise RuntimeError(msg) namespace = self.bootstrap_config.opentelemetry_namespace tags = ({"service_namespace": namespace} if namespace else {}) | self.bootstrap_config.pyroscope_tags pyroscope.configure( diff --git a/lite_bootstrap/instruments/sentry_instrument.py b/lite_bootstrap/instruments/sentry_instrument.py index 0d6f3f9..35e77b8 100644 --- a/lite_bootstrap/instruments/sentry_instrument.py +++ b/lite_bootstrap/instruments/sentry_instrument.py @@ -122,3 +122,13 @@ def bootstrap(self) -> None: ) tags: dict[str, str] = self.bootstrap_config.sentry_tags or {} sentry_sdk.set_tags(tags) + + def teardown(self) -> None: + """Flush pending events and reset the SDK to a no-op state. + + Calling ``sentry_sdk.init()`` with no DSN disables further event capture. This + cleans up after a bootstrap so the same process can be torn down and re-tested + without leaking the previous DSN/transport into subsequent code. + """ + sentry_sdk.flush(timeout=2) + sentry_sdk.init() diff --git a/planning/plans/2026-06-05-pr1-lifecycle.md b/planning/plans/2026-06-05-pr1-lifecycle.md new file mode 100644 index 0000000..be5591a --- /dev/null +++ b/planning/plans/2026-06-05-pr1-lifecycle.md @@ -0,0 +1,1083 @@ +# PR1 — Lifecycle & Teardown Correctness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Land all 10 lifecycle/teardown fixes from the 2026-06-05 audit (LOG-1..9 + SEC-4) into one cohesive PR. + +**Architecture:** Each fix is a small TDD cycle (failing test → implement → verify → commit). The sub-fixes are independent code-wise but share the "bootstrap promised, teardown didn't deliver" theme. Tasks are ordered to land the simplest invariants first (assert → raise), then the OTel teardown completeness, then the broader teardown-robustness fixes, then the app-reuse safety guards. + +**Tech Stack:** Python 3.10+, `uv` workspace, `pytest`, `pytest-asyncio`, `ty` type checker, `ruff` formatter, `structlog`, `opentelemetry-sdk`, `sentry-sdk`, `fastmcp`, `litestar`, `faststream`, `fastapi`. + +**Branch:** `fix/bug-audit-v2-pr1-lifecycle` + +**Important deviation from sequencing doc:** During plan drafting, the OTel SDK's `set_tracer_provider` was found to be enforced as set-once via `_TRACER_PROVIDER_SET_ONCE.do_once(...)` (see `opentelemetry/trace/__init__.py:548-556` in the pinned version). A second `set_tracer_provider(NoOpTracerProvider())` would be logged-and-ignored, not applied. **LOG-1 therefore becomes a docstring-only change** that documents the OTel set-once constraint — the existing `shutdown()` call (added in CRIT-2) is the only practical teardown action. The audit's TEST-NEW-2 splits accordingly: the "shutdown called" half is already covered by `test_opentelemetry_instrument_teardown_shuts_down_tracer_provider`; the "global is reset" half is dropped because OTel doesn't support it. + +--- + +## File Structure + +Modifications to existing files only — no new files. + +| File | What changes | +|------|-------------| +| `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py` | `_narrow_app` assert → raise (Task 1); LOG-8 re-wrap guard (Task 10) | +| `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py` | LOG-7 re-attach guard (Task 9) | +| `lite_bootstrap/bootstrappers/faststream_bootstrapper.py` | LOG-4 broker logger restore (Task 6) | +| `lite_bootstrap/bootstrappers/litestar_bootstrapper.py` | LOG-6 WeakKeyDictionary cache (Task 8) | +| `lite_bootstrap/instruments/logging_instrument.py` | LOG-3 teardown try/finally (Task 5) | +| `lite_bootstrap/instruments/opentelemetry_instrument.py` | LOG-1 docstring (Task 3); LOG-2 logger restore (Task 4) | +| `lite_bootstrap/instruments/pyroscope_instrument.py` | Pyroscope precondition assert → raise (Task 2) | +| `lite_bootstrap/instruments/sentry_instrument.py` | LOG-9 teardown override (Task 7) | +| `tests/test_fastapi_bootstrap.py` | Tests for Tasks 1, 10 | +| `tests/test_fastmcp_bootstrap.py` | Test for Task 9 | +| `tests/test_faststream_bootstrap.py` | Test for Task 6 | +| `tests/test_litestar_bootstrap.py` | Test for Task 8 | +| `tests/instruments/test_logging_instrument.py` | Test for Task 5 | +| `tests/instruments/test_opentelemetry_instrument.py` | Tests for Tasks 3, 4 | +| `tests/instruments/test_pyroscope_instrument.py` | Test for Task 2 | +| `tests/instruments/test_sentry_instrument.py` | Test for Task 7; remove `finally: sentry_sdk.init()` workarounds | + +--- + +## Task 1: LOG-5 part A — `_narrow_app` assert → explicit raise + +**Files:** +- Modify: `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:78-80` +- Test: `tests/test_fastapi_bootstrap.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_fastapi_bootstrap.py`: + +```python +import dataclasses + +import pytest + +from lite_bootstrap.bootstrappers.fastapi_bootstrapper import _narrow_app +from lite_bootstrap.types import UNSET + + +def test_narrow_app_raises_when_application_unset() -> None: + # Build a config and forcibly reset application to UNSET to simulate the + # invariant violation `_narrow_app` was guarding with an assert. + config = FastAPIConfig() + object.__setattr__(config, "application", UNSET) + with pytest.raises(RuntimeError, match="application is UNSET"): + _narrow_app(config) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/test_fastapi_bootstrap.py::test_narrow_app_raises_when_application_unset +``` + +Expected: FAIL — current code uses `assert` which raises `AssertionError`, not `RuntimeError`. The `pytest.raises(RuntimeError, match=...)` will not match. + +- [ ] **Step 3: Replace the assert** + +In `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py`, replace lines 78-80: + +```python +def _narrow_app(config: "FastAPIConfig") -> "fastapi.FastAPI": + if isinstance(config.application, UnsetType): + msg = "FastAPIConfig.application is UNSET; __post_init__ did not run" + raise RuntimeError(msg) + return config.application +``` + +- [ ] **Step 4: Run the test to verify it passes** + +```bash +just test -- tests/test_fastapi_bootstrap.py::test_narrow_app_raises_when_application_unset +``` + +Expected: PASS. + +- [ ] **Step 5: Verify nothing else broke** + +```bash +just test -- tests/test_fastapi_bootstrap.py tests/test_fastapi_offline_docs.py +``` + +Expected: all green. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/bootstrappers/fastapi_bootstrapper.py tests/test_fastapi_bootstrap.py +git commit -m "fix: replace _narrow_app assert with RuntimeError (LOG-5/SEC-4 part A) + +Bandit B101 flagged the assert as stripped under \`python -O\`. Replace with +explicit raise so the invariant holds under all Python optimization levels." +``` + +--- + +## Task 2: LOG-5 part B — Pyroscope precondition assert → explicit raise + +**Files:** +- Modify: `lite_bootstrap/instruments/pyroscope_instrument.py:34-37` +- Test: `tests/instruments/test_pyroscope_instrument.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/instruments/test_pyroscope_instrument.py`: + +```python +def test_pyroscope_bootstrap_raises_when_endpoint_unset() -> None: + # Build a config that passes is_configured (endpoint set), then forcibly + # clear the endpoint to simulate a caller bypassing the bootstrapper's + # is_configured gate. + config = _make_config() + object.__setattr__(config, "pyroscope_endpoint", None) + instrument = PyroscopeInstrument(bootstrap_config=config) + with pytest.raises(RuntimeError, match="pyroscope_endpoint is unset"): + instrument.bootstrap() +``` + +Verify `pytest` is already imported in the file (it is — see line 4). + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/instruments/test_pyroscope_instrument.py::test_pyroscope_bootstrap_raises_when_endpoint_unset +``` + +Expected: FAIL — the current `assert` raises `AssertionError`, not `RuntimeError`. + +- [ ] **Step 3: Replace the assert** + +In `lite_bootstrap/instruments/pyroscope_instrument.py`, replace lines 34-37: + +```python +def bootstrap(self) -> None: + # is_configured() guarantees pyroscope_endpoint is set when called via the + # bootstrapper. Direct callers bypassing is_configured see an explicit raise. + if self.bootstrap_config.pyroscope_endpoint is None: + msg = "pyroscope_endpoint is unset; PyroscopeInstrument.is_configured() should have returned False" + raise RuntimeError(msg) + namespace = self.bootstrap_config.opentelemetry_namespace + tags = ({"service_namespace": namespace} if namespace else {}) | self.bootstrap_config.pyroscope_tags + pyroscope.configure( + application_name=self.bootstrap_config.opentelemetry_service_name or self.bootstrap_config.service_name, + server_address=self.bootstrap_config.pyroscope_endpoint, + sample_rate=self.bootstrap_config.pyroscope_sample_rate, + tags=tags, + **self.bootstrap_config.pyroscope_additional_params, + ) +``` + +- [ ] **Step 4: Run the test to verify it passes** + +```bash +just test -- tests/instruments/test_pyroscope_instrument.py::test_pyroscope_bootstrap_raises_when_endpoint_unset +``` + +Expected: PASS. + +- [ ] **Step 5: Verify nothing else broke** + +```bash +just test -- tests/instruments/test_pyroscope_instrument.py +``` + +Expected: all green. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/instruments/pyroscope_instrument.py tests/instruments/test_pyroscope_instrument.py +git commit -m "fix: replace pyroscope precondition assert with RuntimeError (LOG-5/SEC-4 part B) + +Bandit B101 flagged the assert as stripped under \`python -O\`. Replace with +explicit raise. Resolves the second of the two bandit B101 findings in PR1." +``` + +--- + +## Task 3: LOG-1 — Document OTel `set_tracer_provider` constraint + +**Files:** +- Modify: `lite_bootstrap/instruments/opentelemetry_instrument.py` (class docstring on `OpenTelemetryInstrument`) + +**Context:** `opentelemetry.trace.set_tracer_provider` is enforced as set-once via `_TRACER_PROVIDER_SET_ONCE.do_once(...)` (verified against the pinned OTel SDK). A teardown that resets the global to a no-op would require touching private SDK internals (`_TRACER_PROVIDER` and `_TRACER_PROVIDER_SET_ONCE`). That hack is out of scope for this PR — we instead document the constraint so users understand the lifecycle. The audit's LOG-1 finding is downgraded to "documentation only" once the OTel API behavior is verified. + +- [ ] **Step 1: Add docstring to `OpenTelemetryInstrument`** + +In `lite_bootstrap/instruments/opentelemetry_instrument.py:80-86`, add a class docstring: + +```python +@dataclasses.dataclass(kw_only=True, slots=True) +class OpenTelemetryInstrument(BaseInstrument[OpenTelemetryConfig]): + """OpenTelemetry tracing instrument. + + Lifecycle note: ``bootstrap()`` calls ``opentelemetry.trace.set_tracer_provider``, + which the OTel SDK enforces as **set-once per process** (subsequent calls log + "Overriding of current TracerProvider is not allowed" and have no effect). + ``teardown()`` calls ``shutdown()`` on the provider, which flushes batched + spans and closes exporters, but it cannot reset the process-global pointer — + callers of ``opentelemetry.trace.get_tracer_provider()`` after teardown will + still receive the shut-down provider. The supported lifecycle is one + ``OpenTelemetryInstrument`` per process; do not bootstrap a second instance. + """ + + not_ready_message = "opentelemetry_endpoint is empty and opentelemetry_log_traces is False" + missing_dependency_message = "opentelemetry is not installed" + _tracer_provider: "TracerProvider | None" = dataclasses.field( + default_factory=lambda: None, init=False, repr=False, compare=False + ) +``` + +- [ ] **Step 2: Verify lint passes** + +```bash +just lint-ci +``` + +Expected: green. Docstring change only. + +- [ ] **Step 3: Commit** + +```bash +git add lite_bootstrap/instruments/opentelemetry_instrument.py +git commit -m "docs: document OTel set_tracer_provider set-once constraint (LOG-1) + +The OTel SDK enforces set_tracer_provider as set-once per process. teardown() +calls shutdown() (added in CRIT-2) but cannot reset the process-global pointer. +Document the supported lifecycle: one OpenTelemetryInstrument per process." +``` + +--- + +## Task 4: LOG-2 — Restore disabled OTel loggers on teardown + +**Files:** +- Modify: `lite_bootstrap/instruments/opentelemetry_instrument.py` +- Test: `tests/instruments/test_opentelemetry_instrument.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/instruments/test_opentelemetry_instrument.py`: + +```python +import logging + + +def test_opentelemetry_teardown_restores_disabled_loggers() -> None: + instrumentor_logger = logging.getLogger("opentelemetry.instrumentation.instrumentor") + trace_logger = logging.getLogger("opentelemetry.trace") + # Capture pre-bootstrap state so the assertion is independent of test order. + prior_instrumentor_disabled = instrumentor_logger.disabled + prior_trace_disabled = trace_logger.disabled + + instrument = OpenTelemetryInstrument( + bootstrap_config=OpenTelemetryConfig(opentelemetry_log_traces=True), + ) + instrument.bootstrap() + assert instrumentor_logger.disabled is True + assert trace_logger.disabled is True + + instrument.teardown() + + assert instrumentor_logger.disabled is prior_instrumentor_disabled + assert trace_logger.disabled is prior_trace_disabled +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/instruments/test_opentelemetry_instrument.py::test_opentelemetry_teardown_restores_disabled_loggers +``` + +Expected: FAIL — current `teardown()` does not restore the `disabled` flags. + +- [ ] **Step 3: Add capture-and-restore logic** + +In `lite_bootstrap/instruments/opentelemetry_instrument.py`, modify the `OpenTelemetryInstrument` dataclass (around line 80-86 — keep the docstring from Task 3) to add a new init=False field: + +```python +@dataclasses.dataclass(kw_only=True, slots=True) +class OpenTelemetryInstrument(BaseInstrument[OpenTelemetryConfig]): + """OpenTelemetry tracing instrument. + + Lifecycle note: ``bootstrap()`` calls ``opentelemetry.trace.set_tracer_provider``, + which the OTel SDK enforces as **set-once per process** (subsequent calls log + "Overriding of current TracerProvider is not allowed" and have no effect). + ``teardown()`` calls ``shutdown()`` on the provider, which flushes batched + spans and closes exporters, but it cannot reset the process-global pointer — + callers of ``opentelemetry.trace.get_tracer_provider()`` after teardown will + still receive the shut-down provider. The supported lifecycle is one + ``OpenTelemetryInstrument`` per process; do not bootstrap a second instance. + """ + + not_ready_message = "opentelemetry_endpoint is empty and opentelemetry_log_traces is False" + missing_dependency_message = "opentelemetry is not installed" + _tracer_provider: "TracerProvider | None" = dataclasses.field( + default_factory=lambda: None, init=False, repr=False, compare=False + ) + _prior_logger_disabled: dict[str, bool] = dataclasses.field( + default_factory=dict, init=False, repr=False, compare=False + ) +``` + +In the same file, modify `bootstrap()` (lines 107-109 currently set `.disabled = True` on two loggers). Replace those two lines with capture-then-set: + +```python +def bootstrap(self) -> None: + for logger_name in ("opentelemetry.instrumentation.instrumentor", "opentelemetry.trace"): + otel_logger = logging.getLogger(logger_name) + self._prior_logger_disabled[logger_name] = otel_logger.disabled + otel_logger.disabled = True + attributes = { + resources.SERVICE_NAME: self.bootstrap_config.opentelemetry_service_name + or self.bootstrap_config.service_name, + resources.TELEMETRY_SDK_LANGUAGE: "python", + resources.SERVICE_NAMESPACE: self.bootstrap_config.opentelemetry_namespace, + resources.SERVICE_VERSION: self.bootstrap_config.service_version, + resources.CONTAINER_NAME: self.bootstrap_config.opentelemetry_container_name, + } + # ... (rest of bootstrap body unchanged from current code) +``` + +In `teardown()` (lines 146-156), add restore logic. Place it after the uninstrument loop, before the `_tracer_provider.shutdown()` block so restoration happens even if shutdown raises: + +```python +def teardown(self) -> None: + for one_instrumentor in self.bootstrap_config.opentelemetry_instrumentors: + if isinstance(one_instrumentor, InstrumentorWithParams): + one_instrumentor.instrumentor.uninstrument(**one_instrumentor.additional_params) + else: + one_instrumentor.uninstrument() + for logger_name, prior in self._prior_logger_disabled.items(): + logging.getLogger(logger_name).disabled = prior + self._prior_logger_disabled.clear() + if self._tracer_provider is not None: + try: + self._tracer_provider.shutdown() + finally: + self._tracer_provider = None +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/instruments/test_opentelemetry_instrument.py::test_opentelemetry_teardown_restores_disabled_loggers +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the OTel test file** + +```bash +just test -- tests/instruments/test_opentelemetry_instrument.py +``` + +Expected: all green. In particular `test_opentelemetry_instrument_teardown_resets_tracer_provider_when_shutdown_raises` should still pass because the new `_prior_logger_disabled` restore runs **before** the shutdown raise. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/instruments/opentelemetry_instrument.py tests/instruments/test_opentelemetry_instrument.py +git commit -m "fix: restore OTel loggers' disabled state on teardown (LOG-2) + +bootstrap() previously set opentelemetry.instrumentation.instrumentor and +opentelemetry.trace loggers to disabled=True unconditionally, with no symmetric +restoration. Capture the pre-bootstrap state and restore on teardown so unrelated +code in the same process retains its expected logger configuration." +``` + +--- + +## Task 5: LOG-3 — Protect `LoggingInstrument.teardown()` root-handler loop + +**Files:** +- Modify: `lite_bootstrap/instruments/logging_instrument.py:163-178` +- Test: `tests/instruments/test_logging_instrument.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/instruments/test_logging_instrument.py`: + +```python +def test_logging_instrument_teardown_aggregates_handler_close_errors() -> None: + instrument = LoggingInstrument( + bootstrap_config=LoggingConfig(logging_buffer_capacity=0), + ) + instrument.bootstrap() + root_logger = logging.getLogger() + + # Find the StreamHandler added by _configure_foreign_loggers and patch its close. + bootstrap_added = [h for h in root_logger.handlers if isinstance(h, logging.StreamHandler)] + assert bootstrap_added, "bootstrap should have added at least one StreamHandler to root" + target_handler = bootstrap_added[0] + + with patch.object(target_handler, "close", side_effect=RuntimeError("boom")): + with pytest.raises(RuntimeError, match="boom"): + instrument.teardown() + + # After teardown despite the raise, post-loop cleanup must have completed: + assert instrument._logger_factory is None # noqa: SLF001 + assert root_logger.level == logging.WARNING + # And the broken handler must have been removed from root despite raising. + assert target_handler not in root_logger.handlers +``` + +`patch` is already imported in the file (see line 3); `pytest` and `logging` are already in scope. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/instruments/test_logging_instrument.py::test_logging_instrument_teardown_aggregates_handler_close_errors +``` + +Expected: FAIL — current `teardown` would let `h.close()` raise mid-loop; `_logger_factory.close_handlers()` never runs and `_logger_factory` stays non-None. The level reset also doesn't fire. + +- [ ] **Step 3: Rewrite teardown with try/finally** + +In `lite_bootstrap/instruments/logging_instrument.py`, replace `teardown()` (lines 163-178): + +```python +def teardown(self) -> None: + """Reset structlog and root logger. + + Root logger level is unconditionally set to WARNING; pre-existing user configuration is overwritten. + + Best-effort cleanup: errors from individual ``handler.close()`` calls are collected and + re-raised after the rest of teardown (level reset, factory close) completes, so a single + misbehaving handler can't prevent the instrument from releasing the rest of its resources. + """ + structlog.reset_defaults() + root_logger = logging.getLogger() + close_errors: list[BaseException] = [] + try: + for h in root_logger.handlers[:]: + root_logger.removeHandler(h) + try: + h.close() + except Exception as e: # noqa: BLE001, PERF203 + close_errors.append(e) + root_logger.setLevel(logging.WARNING) + finally: + if self._logger_factory is not None: + try: + self._logger_factory.close_handlers() + finally: + self._logger_factory = None + if close_errors: + raise close_errors[0] +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/instruments/test_logging_instrument.py::test_logging_instrument_teardown_aggregates_handler_close_errors +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the logging tests** + +```bash +just test -- tests/instruments/test_logging_instrument.py +``` + +Expected: all green. In particular `test_logging_instrument_teardown_resets_factory_when_close_handlers_raises` (existing) still asserts that `_logger_factory` is nulled on close_handlers raise — the new try/finally preserves that behavior. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/instruments/logging_instrument.py tests/instruments/test_logging_instrument.py +git commit -m "fix: protect LoggingInstrument.teardown root-handler loop (LOG-3) + +A raise from handler.close() mid-loop previously left remaining handlers +attached, skipped the root-level reset, and never called close_handlers on the +factory. Wrap in try/finally and aggregate close errors so the rest of teardown +completes before the first error is re-raised." +``` + +--- + +## Task 6: LOG-4 — Restore broker logger storage on FastStream teardown + +**Files:** +- Modify: `lite_bootstrap/bootstrappers/faststream_bootstrapper.py:110-120` +- Test: `tests/test_faststream_bootstrap.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_faststream_bootstrap.py`: + +```python +def test_faststream_teardown_restores_broker_params_storage(broker: RedisBroker) -> None: + config = build_faststream_config(broker=broker) + original_storage = broker.config.logger.params_storage + bootstrapper = FastStreamBootstrapper(bootstrap_config=config) + bootstrapper.bootstrap() + assert isinstance(broker.config.logger.params_storage, ManualLoggerStorage) + assert broker.config.logger.params_storage is not original_storage + + bootstrapper.teardown() + + assert broker.config.logger.params_storage is original_storage +``` + +`ManualLoggerStorage` and `RedisBroker` are already imported in the file (lines 11-12); `build_faststream_config` is defined locally (line 34). + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/test_faststream_bootstrap.py::test_faststream_teardown_restores_broker_params_storage +``` + +Expected: FAIL — `FastStreamLoggingInstrument` has no `teardown()` override, so `params_storage` stays as the `ManualLoggerStorage` set by bootstrap. + +- [ ] **Step 3: Add snapshot field and teardown override** + +In `lite_bootstrap/bootstrappers/faststream_bootstrapper.py`, modify `FastStreamLoggingInstrument` (lines 110-120) to add a snapshot field and a teardown override: + +```python +@dataclasses.dataclass(kw_only=True) +class FastStreamLoggingInstrument(LoggingInstrument): + bootstrap_config: FastStreamConfig + _prior_broker_params_storage: typing.Any = dataclasses.field( + default=None, init=False, repr=False, compare=False + ) + _broker_logger_replaced: bool = dataclasses.field( + default=False, init=False, repr=False, compare=False + ) + + def bootstrap(self) -> None: + super().bootstrap() + broker = self.bootstrap_config.application.broker + if broker is not None and import_checker.is_structlog_installed and import_checker.is_faststream_installed: + logger = structlog.get_logger("faststream") + logger.setLevel(self.bootstrap_config.faststream_log_level) + self._prior_broker_params_storage = broker.config.logger.params_storage + broker.config.logger.params_storage = ManualLoggerStorage(logger) + self._broker_logger_replaced = True + + def teardown(self) -> None: + if self._broker_logger_replaced: + broker = self.bootstrap_config.application.broker + if broker is not None: + broker.config.logger.params_storage = self._prior_broker_params_storage + self._broker_logger_replaced = False + self._prior_broker_params_storage = None + super().teardown() +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/test_faststream_bootstrap.py::test_faststream_teardown_restores_broker_params_storage +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the FastStream tests** + +```bash +just test -- tests/test_faststream_bootstrap.py +``` + +Expected: all green. In particular `test_faststream_logging_instrument_injects_structlog_logger` should still pass — it inspects state mid-bootstrap, before teardown. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/bootstrappers/faststream_bootstrapper.py tests/test_faststream_bootstrap.py +git commit -m "fix: restore broker logger storage on FastStreamLoggingInstrument teardown (LOG-4) + +bootstrap() mutates broker.config.logger.params_storage to inject a structlog +logger; teardown() now captures and restores the original value. Symmetric with +LoggingInstrument's parent teardown, which still runs via super()." +``` + +--- + +## Task 7: LOG-9 — Add `SentryInstrument.teardown()` + +**Files:** +- Modify: `lite_bootstrap/instruments/sentry_instrument.py:94-125` +- Modify: `tests/instruments/test_sentry_instrument.py` (add test + drop workarounds) + +- [ ] **Step 1: Write the failing test** + +Add to `tests/instruments/test_sentry_instrument.py`: + +```python +def test_sentry_teardown_disables_sdk(minimal_sentry_config: SentryConfig) -> None: + instrument = SentryInstrument(bootstrap_config=minimal_sentry_config) + instrument.bootstrap() + assert sentry_sdk.Hub.current.client is not None + + instrument.teardown() + + # sentry_sdk.init() with no DSN disables the SDK; the Hub's client either becomes + # None or has dsn=None depending on the SDK version. Both indicate "disabled". + client = sentry_sdk.Hub.current.client + assert client is None or client.dsn is None +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/instruments/test_sentry_instrument.py::test_sentry_teardown_disables_sdk +``` + +Expected: FAIL — `SentryInstrument` currently has no `teardown()` override, so the SDK stays initialized with the test DSN. + +- [ ] **Step 3: Add the teardown override** + +In `lite_bootstrap/instruments/sentry_instrument.py`, extend `SentryInstrument` (after `bootstrap()`, around line 125): + +```python +def teardown(self) -> None: + """Flush pending events and reset the SDK to a no-op state. + + Calling ``sentry_sdk.init()`` with no DSN disables further event capture. This + cleans up after a bootstrap so the same process can be torn down and re-tested + without leaking the previous DSN/transport into subsequent code. + """ + sentry_sdk.flush(timeout=2) + sentry_sdk.init() +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/instruments/test_sentry_instrument.py::test_sentry_teardown_disables_sdk +``` + +Expected: PASS. + +- [ ] **Step 5: Drop the redundant `finally: sentry_sdk.init()` workarounds** + +The existing tests at `tests/instruments/test_sentry_instrument.py:43` and `:65` use +`finally: sentry_sdk.init()` to reset the SDK between test cases. With the new +`teardown()` in place, callers that go through the bootstrapper / instrument will get the +reset automatically. Update the two tests so the cleanup happens via `instrument.teardown()`: + +Replace `test_sentry_instrument_with_raise` (lines 36-43): + +```python +def test_sentry_instrument_with_raise(minimal_sentry_config: SentryConfig, sentry_mock: SentryTestTransport) -> None: + instrument = SentryInstrument(bootstrap_config=minimal_sentry_config) + instrument.bootstrap() + + try: + std_logger.error("some error") + assert len(sentry_mock.mock_envelopes) == 1 + finally: + instrument.teardown() +``` + +Replace `test_sentry_instrument_with_structlog_error` (lines 46-66): + +```python +def test_sentry_instrument_with_structlog_error( + minimal_sentry_config: SentryConfig, sentry_mock: SentryTestTransport, logging_mock: LoggingMock +) -> None: + sentry_instrument = SentryInstrument(bootstrap_config=minimal_sentry_config) + sentry_instrument.bootstrap() + logging_instrument = LoggingInstrument( + bootstrap_config=LoggingConfig( + logging_unset_handlers=["uvicorn"], + logging_buffer_capacity=0, + service_debug=False, + logging_extra_processors=[logging_mock], + ) + ) + logging_instrument.bootstrap() + + try: + logger.error("some error") + logger.error("some error, skipping sentry", skip_sentry=True) + assert len(sentry_mock.mock_envelopes) == 1 + finally: + logging_instrument.teardown() + sentry_instrument.teardown() +``` + +- [ ] **Step 6: Run the full sentry test file** + +```bash +just test -- tests/instruments/test_sentry_instrument.py +``` + +Expected: all green. + +- [ ] **Step 7: Commit** + +```bash +git add lite_bootstrap/instruments/sentry_instrument.py tests/instruments/test_sentry_instrument.py +git commit -m "fix: add SentryInstrument.teardown that disables the SDK (LOG-9) + +bootstrap() called sentry_sdk.init() but no teardown reset the global state. +Process-local tests had to call sentry_sdk.init() in finally blocks as a +workaround; those are now replaced by instrument.teardown(). flush() drains +in-flight events before the reset." +``` + +--- + +## Task 8: LOG-6 — `WeakKeyDictionary` for Litestar OTel cache + +**Files:** +- Modify: `lite_bootstrap/bootstrappers/litestar_bootstrapper.py:75-99` +- Test: `tests/test_litestar_bootstrap.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_litestar_bootstrap.py`: + +```python +import gc +import weakref + + +def test_litestar_otel_apps_cache_evicts_dead_refs() -> None: + from lite_bootstrap.bootstrappers.litestar_bootstrapper import ( + LitestarOpenTelemetryInstrumentationMiddleware, + ) + from opentelemetry.sdk.trace import TracerProvider + + tracer_provider = TracerProvider() + middleware = LitestarOpenTelemetryInstrumentationMiddleware( + tracer_provider=tracer_provider, + excluded_urls=set(), + ) + + async def transient_app(scope: dict, receive: object, send: object) -> None: # noqa: ARG001 + return None + + weak_app = weakref.ref(transient_app) + middleware._otel_apps[transient_app] = "marker" # noqa: SLF001 — direct cache poke + assert weak_app() is not None + assert len(middleware._otel_apps) == 1 # noqa: SLF001 + + del transient_app + gc.collect() + + assert weak_app() is None + assert len(middleware._otel_apps) == 0 # noqa: SLF001 +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/test_litestar_bootstrap.py::test_litestar_otel_apps_cache_evicts_dead_refs +``` + +Expected: FAIL — `_otel_apps` is a `dict[int, ASGIApp]`; the integer key doesn't evict when the `transient_app` is GC'd. Also fails to even accept `transient_app` as a key (current code uses `id(next_app)`). + +The test sets `middleware._otel_apps[transient_app] = "marker"` to verify the dict-like API expects the app object as the key. This will need to be a real WeakKeyDictionary for the assertion to pass. + +- [ ] **Step 3: Replace cache with WeakKeyDictionary** + +In `lite_bootstrap/bootstrappers/litestar_bootstrapper.py`, add `import weakref` near the top of the file (alongside the existing imports) and replace lines 75-99: + +```python +if import_checker.is_litestar_opentelemetry_installed: + + class LitestarOpenTelemetryInstrumentationMiddleware(ASGIMiddleware): + def __init__(self, tracer_provider: "TracerProvider", excluded_urls: set[str]) -> None: + self._tracer_provider = tracer_provider + self._excluded_urls = ",".join(excluded_urls) + # WeakKeyDictionary so wrapper apps are evicted when Litestar drops the + # next_app reference (hot reload, plugin add/remove, AppConfig rebuild). + # Falls back gracefully — apps that don't support weak refs are simply + # not cached. + self._otel_apps: "weakref.WeakKeyDictionary[ASGIApp, ASGIApp]" = weakref.WeakKeyDictionary() + + async def handle( + self, + scope: "Scope", + receive: "Receive", + send: "Send", + next_app: "ASGIApp", + ) -> None: + otel_app = self._otel_apps.get(next_app) + if otel_app is None: + otel_app = OpenTelemetryMiddleware( + app=next_app, + default_span_details=build_litestar_route_details_from_scope, + excluded_urls=self._excluded_urls, + tracer_provider=self._tracer_provider, + ) + try: + self._otel_apps[next_app] = otel_app # ty: ignore[invalid-assignment] + except TypeError: + # next_app doesn't support weak references; skip caching for it. + pass + await otel_app(scope, receive, send) # ty: ignore[invalid-argument-type] +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/test_litestar_bootstrap.py::test_litestar_otel_apps_cache_evicts_dead_refs +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the Litestar tests** + +```bash +just test -- tests/test_litestar_bootstrap.py +``` + +Expected: all green. In particular `test_litestar_otel_span_naming` exercises the cache hit path on real requests. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/bootstrappers/litestar_bootstrapper.py tests/test_litestar_bootstrap.py +git commit -m "fix: use WeakKeyDictionary for Litestar OTel app cache (LOG-6) + +The previous dict[int, ASGIApp] keyed by id() never evicted, holding wrapper +OpenTelemetryMiddleware instances alive after Litestar dropped the next_app +reference. Switch to weakref.WeakKeyDictionary so wrappers GC with their apps. +Skip caching gracefully when an app doesn't support weak references." +``` + +--- + +## Task 9: LOG-7 — Guard against double `_TeardownProvider` attachment on FastMCP + +**Files:** +- Modify: `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py:152-154` +- Test: `tests/test_fastmcp_bootstrap.py` + +**Context:** FastMCP's `add_provider` appends to `self.providers` (verified at +`fastmcp/server/providers/aggregate.py:116`). The list is inherited from `AggregateProvider` +(`fastmcp/server/providers/aggregate.py:88`). We can iterate `application.providers` to +check for existing `_TeardownProvider` instances. + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_fastmcp_bootstrap.py`: + +```python +import warnings + + +def test_second_fastmcp_bootstrapper_on_same_app_warns_not_stacks() -> None: + application = FastMCP() + config_a = FastMcpConfig(application=application, service_name="a") + bootstrapper_a = FastMcpBootstrapper(bootstrap_config=config_a) + providers_after_first = list(application.providers) + + config_b = FastMcpConfig(application=application, service_name="b") + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + FastMcpBootstrapper(bootstrap_config=config_b) + + matching = [w for w in caught if "_TeardownProvider" in str(w.message)] + assert matching, "expected warning about existing _TeardownProvider" + assert list(application.providers) == providers_after_first, ( + "second bootstrapper must not stack another _TeardownProvider" + ) + + bootstrapper_a.teardown() +``` + +`FastMCP` and `FastMcpConfig` are already imported in the file. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/test_fastmcp_bootstrap.py::test_second_fastmcp_bootstrapper_on_same_app_warns_not_stacks +``` + +Expected: FAIL — current code unconditionally calls `add_provider`, so `application.providers` would grow by one and no warning is emitted. + +- [ ] **Step 3: Add the re-attach guard** + +In `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py`, add `import warnings` at the top +of the file (alongside the existing imports) and modify `__init__` (lines 152-154): + +```python +def __init__(self, bootstrap_config: FastMcpConfig) -> None: + super().__init__(bootstrap_config) + if any(isinstance(p, _TeardownProvider) for p in self.bootstrap_config.application.providers): + warnings.warn( + "FastMCP application already has a _TeardownProvider attached; " + "skipping re-attachment. Construct one FastMcpBootstrapper per application.", + stacklevel=2, + ) + return + self.bootstrap_config.application.add_provider(_TeardownProvider(self.teardown)) +``` + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/test_fastmcp_bootstrap.py::test_second_fastmcp_bootstrapper_on_same_app_warns_not_stacks +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the FastMCP tests** + +```bash +just test -- tests/test_fastmcp_bootstrap.py +``` + +Expected: all green. The existing `test_fastmcp_teardown_runs_via_asgi_lifespan` confirms a single attachment still fires teardown on lifespan shutdown. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py tests/test_fastmcp_bootstrap.py +git commit -m "fix: guard against double _TeardownProvider attachment on FastMCP (LOG-7) + +Constructing two FastMcpBootstrappers around the same application previously +stacked two _TeardownProvider instances, doubling the teardown call on shutdown. +Detect an existing _TeardownProvider in application.providers and skip re-attach +with a warning." +``` + +--- + +## Task 10: LOG-8 — Guard against double lifespan wrapping on FastAPI + +**Files:** +- Modify: `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:197-205` +- Test: `tests/test_fastapi_bootstrap.py` + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_fastapi_bootstrap.py`: + +```python +def test_second_fastapi_bootstrapper_on_same_app_warns_not_stacks(fastapi_config: FastAPIConfig) -> None: + application = fastapi.FastAPI() + config_a = dataclasses.replace(fastapi_config, application=application) + FastAPIBootstrapper(bootstrap_config=config_a) + lifespan_after_first = application.router.lifespan_context + + config_b = dataclasses.replace(fastapi_config, application=application) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + FastAPIBootstrapper(bootstrap_config=config_b) + + matching = [w for w in caught if "lifespan" in str(w.message).lower()] + assert matching, "expected warning about existing lite-bootstrap lifespan" + assert application.router.lifespan_context is lifespan_after_first, ( + "second bootstrapper must not re-wrap the lifespan" + ) +``` + +`warnings` will need to be imported at the top of the test file (it is — line 4 in current state via no import; verify via Read before committing). If missing, add `import warnings`. + +- [ ] **Step 2: Run test to verify it fails** + +```bash +just test -- tests/test_fastapi_bootstrap.py::test_second_fastapi_bootstrapper_on_same_app_warns_not_stacks +``` + +Expected: FAIL — current `__init__` unconditionally wraps `lifespan_context`, so the second bootstrapper stacks another layer. + +- [ ] **Step 3: Add the re-wrap guard** + +In `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py`, modify the `FastAPIBootstrapper.__init__` (lines 197-205): + +```python +def __init__(self, bootstrap_config: FastAPIConfig) -> None: + super().__init__(bootstrap_config) + + application = _narrow_app(self.bootstrap_config) + if getattr(application.state, "lite_bootstrap_lifespan_attached", False): + warnings.warn( + "FastAPI application already has a lite-bootstrap lifespan wrapper attached; " + "skipping re-wrap. Construct one FastAPIBootstrapper per application.", + stacklevel=2, + ) + return + application.state.lite_bootstrap_lifespan_attached = True + old_lifespan_manager = application.router.lifespan_context + application.router.lifespan_context = _merge_lifespan_context( + old_lifespan_manager, + self.lifespan_manager, + ) +``` + +`warnings` is already imported in this file (line 4). `application.state` is the standard Starlette state container — assignment to arbitrary attributes is supported. + +- [ ] **Step 4: Run the new test to verify it passes** + +```bash +just test -- tests/test_fastapi_bootstrap.py::test_second_fastapi_bootstrapper_on_same_app_warns_not_stacks +``` + +Expected: PASS. + +- [ ] **Step 5: Verify the rest of the FastAPI tests** + +```bash +just test -- tests/test_fastapi_bootstrap.py +``` + +Expected: all green. The existing `test_fastapi_bootstrap` exercises the single-attachment lifespan path on a real ASGI client. + +- [ ] **Step 6: Commit** + +```bash +git add lite_bootstrap/bootstrappers/fastapi_bootstrapper.py tests/test_fastapi_bootstrap.py +git commit -m "fix: guard against double lifespan wrapping on FastAPI (LOG-8) + +Constructing two FastAPIBootstrappers around the same application previously +stacked two lifespan wrappers, calling teardown twice on shutdown. Detect the +sentinel on application.state and skip re-wrap with a warning. Idempotent +teardown (CRIT-3) means the prior behavior was harmless, but this removes the +log noise and confusing stack depth." +``` + +--- + +## Final verification + +- [ ] **Step 1: Full test suite** + +```bash +just test +``` + +Expected: all green. Coverage stays at 100% (`--cov-fail-under=100` from `pyproject.toml`). + +- [ ] **Step 2: Lint (includes ty type check)** + +```bash +just lint-ci +``` + +Expected: all green. + +- [ ] **Step 3: Bandit clean (manual verification of LOG-5/SEC-4 closure)** + +```bash +bandit -r lite_bootstrap 2>&1 | grep -c "B101" +``` + +Expected: `0` (zero remaining B101 findings after Tasks 1 and 2). + +- [ ] **Step 4: Confirm the commit log shape** + +```bash +git log --oneline origin/main..HEAD +``` + +Expected: 10 commits, each titled `fix:` (or `docs:` for Task 3), each scoped to a single finding. Optional: rebase to squash trivially-related commits if reviewer prefers — recommend keeping them separate so the PR review can match commits to audit IDs. + +--- + +## Self-review checklist (run after the plan finishes) + +1. **Coverage of PR1 findings:** LOG-1 (Task 3) · LOG-2 (Task 4) · LOG-3 (Task 5) · LOG-4 (Task 6) · LOG-5 (Tasks 1+2) · LOG-6 (Task 8) · LOG-7 (Task 9) · LOG-8 (Task 10) · LOG-9 (Task 7) · SEC-4 (Tasks 1+2). All 10. +2. **TEST-NEW coverage:** TEST-NEW-2 (Task 4 — logger restore; LOG-1's "shutdown called" already exists in the suite per the audit) · TEST-NEW-3 (Task 5) · TEST-NEW-4 (Task 6) · TEST-NEW-5 (Tasks 8, 9, 10). All four mapped. +3. **No placeholders:** every step has the exact file path, line numbers where relevant, complete code blocks, exact `just test -- ` command, and the expected outcome. +4. **Type consistency:** `OpenTelemetryInstrument._prior_logger_disabled: dict[str, bool]` (Task 4), `FastStreamLoggingInstrument._prior_broker_params_storage / _broker_logger_replaced` (Task 6), `LitestarOpenTelemetryInstrumentationMiddleware._otel_apps: weakref.WeakKeyDictionary[ASGIApp, ASGIApp]` (Task 8). All consistent between definition and usage. +5. **Commit isolation:** each task ends with a single commit scoped to its finding ID, so PR review can land/revert at the task level if needed. diff --git a/planning/specs/2026-06-05-bug-audit-v2-sequencing.md b/planning/specs/2026-06-05-bug-audit-v2-sequencing.md new file mode 100644 index 0000000..406e621 --- /dev/null +++ b/planning/specs/2026-06-05-bug-audit-v2-sequencing.md @@ -0,0 +1,436 @@ +# Bug Audit v2 — Implementation Sequencing + +**Date:** 2026-06-05 +**Parent spec:** [2026-06-05-bug-audit-v2.md](2026-06-05-bug-audit-v2.md) +**Scope:** All 26 new findings from the audit (5 UX · 9 logic · 5 security · 7 tests). +TEST-NEW-* items fold into their parent code-fix PR rather than getting their own. +**Deliverable:** 3 sequenced PRs grouped by theme. Sequencing rationale, per-PR scope, +locked decisions. + +This is a coarser-grained sequencing than the prior 2026-05-31 plan (which used 7 PRs +for criticals). The audit has no CRIT items, so the work bundles cleanly into three +themed PRs without sacrificing reviewability. + +--- + +## Sequencing rationale + +Group by what a reviewer needs to hold in their head while reading the diff: + +1. **PR1 — Internal lifecycle (teardown + invariants).** The reviewer is auditing + `bootstrap → teardown` symmetry across instruments. One mental model. +2. **PR2 — External API surface (config UX + validation).** The reviewer is checking + what the library exposes to users and how it validates inputs. Different mental + model. +3. **PR3 — Hygiene (docs, warnings, CI).** Pure chore, no code paths to reason about. + +Order is by review-fatigue: largest mental load first while reviewers are fresh, +chore last. + +| # | PR | Findings | Risk | Diff size | +|---|-----|----------|------|-----------| +| 1 | Lifecycle & teardown correctness | LOG-1..9, SEC-4, TEST-NEW-2..5 | Medium | Medium | +| 2 | Config UX & security validation | UX-1, UX-2, UX-3, SEC-1, SEC-2, SEC-3, TEST-NEW-1, TEST-NEW-6 | Low–Medium | Medium | +| 3 | Hygiene + CI gate | UX-4, UX-5, SEC-5, TEST-NEW-7 | Very low | Tiny | + +PR1 lands first because the re-bootstrap safety fix (LOG-7/LOG-8) interacts with the +OTel teardown fix (LOG-1) — easier to integrate them together than across PRs. PR2 and +PR3 are independent of PR1 and of each other; they can land in any order after PR1 +(or in parallel branches). + +Branch naming: `fix/bug-audit-v2-pr-` to match the prior `2026-05-31-pr-...` +convention used for the previous audit. + +--- + +## PR1: Lifecycle & teardown correctness + +**Branch:** `fix/bug-audit-v2-pr1-lifecycle` +**Findings:** LOG-1, LOG-2, LOG-3, LOG-4, LOG-5, LOG-6, LOG-7, LOG-8, LOG-9, SEC-4, +TEST-NEW-2, TEST-NEW-3, TEST-NEW-4, TEST-NEW-5 + +### Scope + +Every fix in this PR is "bootstrap promised, teardown didn't deliver" or +"bootstrap-with-app-reuse breaks invariants". Bundled because: + +- They share files (3 of the 6 instruments, 3 of the 4 bootstrappers). +- They share testing approach (drive lifecycle, snapshot global/instance state, + assert restoration). +- The re-bootstrap fixes (LOG-7/LOG-8) need the OTel teardown reset (LOG-1) to be + reliable; bundling avoids a phantom intermediate state on `main`. + +#### Sub-section A — Type-narrowing invariants (LOG-5 / SEC-4) + +Replace `assert`-based type narrowing with explicit raises so checks survive `python -O`: + +- `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:78-80` (`_narrow_app`): + + ```python + def _narrow_app(config: "FastAPIConfig") -> "fastapi.FastAPI": + if isinstance(config.application, UnsetType): + msg = "FastAPIConfig.application is UNSET; __post_init__ did not run" + raise RuntimeError(msg) + return config.application + ``` + +- `lite_bootstrap/instruments/pyroscope_instrument.py:35-37` (precondition in + `bootstrap()`): same shape — `if endpoint is None: raise RuntimeError(...)`. + +Both bandit B101 findings disappear. + +#### Sub-section B — OTel teardown completeness (LOG-1 / LOG-2) + +In `lite_bootstrap/instruments/opentelemetry_instrument.py`: + +- LOG-1: import `NoOpTracerProvider` from `opentelemetry.trace`. In `teardown()` after + `self._tracer_provider.shutdown()` (lines 152-156), call + `set_tracer_provider(NoOpTracerProvider())` to swap the global back. +- LOG-2: in `__post_init__` (or as a `dataclasses.field(init=False, default_factory=dict, ...)`), + declare `_prior_logger_states: dict[str, bool]`. In `bootstrap()` (lines 107-109), + capture the previous `disabled` value for each logger before mutating. In + `teardown()`, restore. + +#### Sub-section C — Teardown robustness (LOG-3 / LOG-4 / LOG-9) + +- LOG-3 — `lite_bootstrap/instruments/logging_instrument.py:168-178`: wrap the + root-handler loop and level reset in `try/finally` so + `self._logger_factory.close_handlers()` always runs. Aggregate `h.close()` exceptions + into a list; after the loop, raise a `TeardownError`-style aggregate if non-empty. + Pattern matches `BaseBootstrapper.teardown` (`bootstrappers/base.py:96-105`). +- LOG-4 — `lite_bootstrap/bootstrappers/faststream_bootstrapper.py:110-120`: snapshot + `broker.config.logger.params_storage` into an `init=False` field before mutating in + `bootstrap()`. Add a `teardown()` override that restores it before + `super().teardown()`. +- LOG-9 — `lite_bootstrap/instruments/sentry_instrument.py:94-125`: add `teardown()` + that calls `sentry_sdk.flush(timeout=2)` and `sentry_sdk.init()` (no args) inside a + `import_checker.is_sentry_installed` guard. Drop the `finally: sentry_sdk.init()` + workarounds in `tests/instruments/test_sentry_instrument.py:43, 65` as part of this PR. + +#### Sub-section D — App-reuse safety (LOG-6 / LOG-7 / LOG-8) + +- LOG-6 — `lite_bootstrap/bootstrappers/litestar_bootstrapper.py:75-99`: replace + `dict[int, ASGIApp]` with `weakref.WeakValueDictionary` so GC'd `next_app`s evict. +- LOG-7 — `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py:152-154`: before + `application.add_provider(_TeardownProvider(...))`, check `application.providers` + for an existing `_TeardownProvider`. If present, emit + `warnings.warn("FastMCP application already has a _TeardownProvider; skipping re-attachment", stacklevel=2)` + and return. +- LOG-8 — `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:197-205`: store a + sentinel on `application.state.lite_bootstrap_lifespan_attached = True` after + wrapping. If already True on entry, warn-and-skip the wrap. + +### Tests + +- TEST-NEW-2 — `tests/instruments/test_opentelemetry_instrument.py`: + - `test_teardown_resets_global_tracer_provider_to_noop`: bootstrap, teardown, assert + `trace.get_tracer_provider()` is a `NoOpTracerProvider`. + - `test_teardown_restores_disabled_loggers`: set `disabled = False` pre-bootstrap, + run lifecycle, assert restoration. +- TEST-NEW-3 — `tests/instruments/test_logging_instrument.py`: + - `test_teardown_aggregates_handler_close_errors`: patch a root handler's `close` + to raise; assert factory is nulled, level is reset, exception is re-raised. +- TEST-NEW-4 — `tests/test_faststream_bootstrap.py`: + - `test_teardown_restores_broker_params_storage`: snapshot, lifecycle, assert + restoration. +- TEST-NEW-5 — multi-file: + - `test_fastapi_bootstrap.py::test_second_bootstrapper_on_same_app_warns_not_stacks` + - `test_fastmcp_bootstrap.py::test_second_bootstrapper_on_same_app_warns_not_stacks` + - `test_litestar_bootstrap.py::test_otel_apps_cache_evicts_dead_refs` +- Add `test_pyroscope_instrument.py::test_bootstrap_raises_when_endpoint_unset_at_call_time` + and `test_fastapi_bootstrap.py::test_narrow_app_raises_when_application_unset` for LOG-5. +- Drop `sentry_sdk.init()` workarounds in `test_sentry_instrument.py` per LOG-9. +- Add `test_sentry_instrument.py::test_sentry_teardown_resets_sdk_to_noop`. + +### Decisions (locked) + +| Decision | Choice | +|----------|--------| +| Type narrowing | Explicit `raise RuntimeError(msg)`; drop asserts | +| Tracer reset | `set_tracer_provider(NoOpTracerProvider())` after `shutdown()` | +| OTel loggers | Capture-and-restore; don't drop the silencing | +| Teardown errors | Aggregate-and-raise (matches `TeardownError`) | +| OTel cache | `weakref.WeakValueDictionary` | +| Re-attach behavior | Warn + skip (idempotent); never raise | + +### Files + +Source: +- `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py` +- `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py` +- `lite_bootstrap/bootstrappers/faststream_bootstrapper.py` +- `lite_bootstrap/bootstrappers/litestar_bootstrapper.py` +- `lite_bootstrap/instruments/logging_instrument.py` +- `lite_bootstrap/instruments/opentelemetry_instrument.py` +- `lite_bootstrap/instruments/pyroscope_instrument.py` +- `lite_bootstrap/instruments/sentry_instrument.py` + +Tests: +- `tests/test_fastapi_bootstrap.py` +- `tests/test_fastmcp_bootstrap.py` +- `tests/test_faststream_bootstrap.py` +- `tests/test_litestar_bootstrap.py` +- `tests/instruments/test_logging_instrument.py` +- `tests/instruments/test_opentelemetry_instrument.py` +- `tests/instruments/test_pyroscope_instrument.py` +- `tests/instruments/test_sentry_instrument.py` + +### Risk + +Medium. Largest single PR in this plan. Touches 8 source files, 8 test files. The +mitigations are clear scope (one theme: lifecycle), comprehensive tests for each fix, +and existing teardown-error machinery to lean on. PR2 and PR3 stay decoupled, so a +revert of PR1 doesn't block them. + +--- + +## PR2: Config UX & security validation + +**Branch:** `fix/bug-audit-v2-pr2-config-security` +**Findings:** UX-1, UX-2, UX-3, SEC-1, SEC-2, SEC-3, TEST-NEW-1, TEST-NEW-6 + +### Scope + +Every fix in this PR is "the user-facing config doesn't behave the way the docs +imply" or "config validation should reject unsafe combinations". Bundled because: + +- All edits hit config classes and their validation paths. +- All new tests assert config-level behavior, not lifecycle behavior. +- Reviewer mental model: "what happens at `Config(...)` construction time". + +#### Sub-section A — User-app preservation (UX-1) + +`lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:58-75` — move the +`application.title/.debug/.version = ...` assignments **inside** the UnsetType branch: + +```python +def __post_init__(self) -> None: + if not import_checker.is_fastapi_installed: + msg = "fastapi is not installed" + raise ConfigurationError(msg) + + if isinstance(self.application, UnsetType): + application = fastapi.FastAPI(docs_url=self.swagger_path, **self.application_kwargs) + object.__setattr__(self, "application", application) + application.title = self.service_name + application.debug = self.service_debug + application.version = self.service_version + elif self.application_kwargs: + warnings.warn("application_kwargs must be used without application", stacklevel=2) +``` + +#### Sub-section B — FastStream config gaps (UX-2 / UX-3) + +`lite_bootstrap/bootstrappers/faststream_bootstrapper.py:63-72`: + +- UX-2: add `prometheus_collector_registry: "prometheus_client.CollectorRegistry | None" = None` + (typed under `TYPE_CHECKING`). In `FastStreamPrometheusInstrument` (line 143-167), use + the field if non-None, else the existing per-instance default. +- UX-3: add `opentelemetry_excluded_urls: list[str] = dataclasses.field(default_factory=list)` + matching `FastAPIConfig:53` and `LitestarConfig:114`. No change to `_build_excluded_urls`; + the field is now discoverable. + +#### Sub-section C — Security hardening (SEC-1 / SEC-2 / SEC-3) + +- SEC-1 — `lite_bootstrap/helpers/fastapi_helpers.py:37-59`: validate `root_path` with + `lite_bootstrap.helpers.path.is_valid_path` before interpolation. If validation fails, + fall back to `""` and emit `warnings.warn("root_path rejected: ...", stacklevel=2)`. + Matches the existing path-validation regime for `prometheus_metrics_path` / + `swagger_path`. +- SEC-2 — `lite_bootstrap/instruments/opentelemetry_instrument.py:bootstrap()`: after + reading `opentelemetry_endpoint`, parse the host. If `opentelemetry_insecure=True` + AND the host is not `localhost` / `127.0.0.1` / `::1` AND the URL scheme isn't + `unix://`, emit `warnings.warn("OTLP exporter sending traces unencrypted to a non-local endpoint", stacklevel=2)`. +- SEC-3 — `lite_bootstrap/instruments/cors_instrument.py`: add `__post_init__` to + `CorsConfig` that raises `ConfigurationError` when `cors_allowed_credentials is True` + AND (`cors_allowed_origins == ["*"]` OR `cors_allowed_origin_regex in {".*", r".+"}`). + Re-export `ConfigurationError` from the cors module if not already importable there. + +### Tests + +- TEST-NEW-1 — `tests/test_fastapi_bootstrap.py::test_user_supplied_app_keeps_title_version_debug`: + build a pre-configured `fastapi.FastAPI(title="user", version="9.9.9", debug=False)`, + pass via `FastAPIConfig(application=user_app, service_name="lite", ...)`, assert + user's values survive. +- `tests/test_faststream_bootstrap.py::test_prometheus_uses_injected_registry`: register + a counter on a custom registry, pass via the new field, assert the counter appears + in `/metrics`. +- `tests/test_faststream_bootstrap.py::test_opentelemetry_excluded_urls_applied`: + set the field, assert the returned set from `_build_excluded_urls` contains the value. +- TEST-NEW-6 — `tests/instruments/test_cors_instrument.py::test_cors_credentials_with_wildcard_raises`: + parametrized over wildcard variants; assert `ConfigurationError`. +- `tests/test_fastapi_offline_docs.py::test_root_path_with_malicious_chars_is_rejected`: + pass `root_path=""`, assert HTML response does NOT contain + the injected literal AND a warning was emitted. +- `tests/instruments/test_opentelemetry_instrument.py::test_warns_on_insecure_non_local_endpoint`: + bootstrap with a remote endpoint and `opentelemetry_insecure=True`; assert warning. + +### Decisions (locked) + +| Decision | Choice | +|----------|--------| +| UX-1 strategy | Move override inside UnsetType branch; never stomp user values | +| UX-2 default | Keep per-instance isolated registry; opt-in injection | +| SEC-1 method | Validate via `is_valid_path` (consistent with existing path handling) | +| SEC-2 default | Keep `insecure=True`; warn on non-local endpoint | +| SEC-3 scope | Reject at config construction (not at instrument bootstrap) | + +### Files + +Source: +- `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py` +- `lite_bootstrap/bootstrappers/faststream_bootstrapper.py` +- `lite_bootstrap/helpers/fastapi_helpers.py` +- `lite_bootstrap/instruments/cors_instrument.py` +- `lite_bootstrap/instruments/opentelemetry_instrument.py` + +Tests: +- `tests/test_fastapi_bootstrap.py` +- `tests/test_fastapi_offline_docs.py` +- `tests/test_faststream_bootstrap.py` +- `tests/instruments/test_cors_instrument.py` +- `tests/instruments/test_opentelemetry_instrument.py` + +### Risk + +Low–Medium. Each change is additive or refactors within a single `__post_init__`. The +CORS validation could surprise existing users with permissive configs — flag in the +release notes. + +--- + +## PR3: Hygiene + CI gate + +**Branch:** `chore/bug-audit-v2-pr3-hygiene` +**Findings:** UX-4, UX-5, SEC-5, TEST-NEW-7 + +### Scope + +Pure-chore PR. No production code paths change. Bundled so the hygiene fixes land +together as a single small diff. + +#### Sub-section A — Dual-channel skip signal (UX-4) + +`lite_bootstrap/bootstrappers/base.py:62-67`: after the existing +`warnings.warn(...)`, add `logger.warning("instrument %s skipped: %s", instrument_type.__name__, instrument_type.missing_dependency_message)`. +Two channels — users who suppress one still see the other. + +#### Sub-section B — `from_object` asymmetry doc (UX-5) + +`CLAUDE.md` under the "Conventions" section: add a one-paragraph note explaining +that `from_dict` accepts explicit `None` (overrides default) while `from_object` +filters `None` (default wins). Reference the docstrings and tests that pin it. + +#### Sub-section C — Escalate warning regressions (TEST-NEW-7) + +`pyproject.toml [tool.pytest.ini_options]`: add + +```toml +filterwarnings = [ + "error::lite_bootstrap.exceptions.InstrumentSkippedWarning", +] +``` + +Verify the existing tests still pass — they should because every dependency-missing +test already uses `pytest.warns(...)` to opt in. + +#### Sub-section D — CI dependency audit (SEC-5) + +`.github/workflows/security-audit.yml` (new file; check that no equivalent step exists +in the current workflows first): + +```yaml +name: security-audit +on: + pull_request: + schedule: + - cron: "0 6 * * 1" # weekly Monday +jobs: + pip-audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v3 + - run: | + uv export --all-extras --no-hashes > /tmp/reqs.txt + uv tool install pip-audit + pip-audit --no-deps --disable-pip -r /tmp/reqs.txt +``` + +### Tests + +No new tests. TEST-NEW-7 changes pytest config; verification is that the existing +suite still passes (`just test`). + +### Decisions (locked) + +| Decision | Choice | +|----------|--------| +| UX-4 channels | Both `warnings.warn` AND `logger.warning` | +| SEC-5 tool | `pip-audit` over `safety` (OSV-backed, uv-native) | +| SEC-5 cadence | Per-PR + weekly cron | +| TEST-NEW-7 scope | Escalate only `InstrumentSkippedWarning` subclasses | + +### Files + +- `lite_bootstrap/bootstrappers/base.py` +- `CLAUDE.md` +- `pyproject.toml` +- `.github/workflows/security-audit.yml` + +### Risk + +Very low. No production behavior change; CI step is additive. Worst case is the +filterwarnings change surfaces a pre-existing leak in the suite, in which case fix +the leaky test inline. + +--- + +## Branch hygiene & CI + +- PR1 lands first; PR2 and PR3 can land in any order after PR1, or in parallel + branches off `main`. +- Each PR runs `just lint-ci` and `just test` via existing CI. +- PR1 additionally verifies the new tests fail on `main` for the LOG-1..9 cases (each + test should fail when the fix is reverted — this is the regression proof). +- Squash-merge each PR before the next branches off (matches the project's recent + pattern of `fix:` / `feat:` / `chore:` prefixes). + +--- + +## Cross-PR locked decisions + +| Decision | Choice | +|----------|--------| +| Granularity | 3 PRs by review-mental-model theme | +| Order | Lifecycle first (largest mental load while reviewers are fresh) | +| PR2 / PR3 parallelism | Allowed; both independent of each other and of PR1 | + +--- + +## Deferred (out of scope for this plan) + +None. The audit's 26 new findings all map to a PR above. + +If even 3 PRs feels too coarse, the [9-PR finer-grained sequencing] can be reconstructed +from the audit by mapping: + +- PR1 splits into: assert→raise (1 finding), OTel teardown (2), teardown robustness (3), + app-reuse safety (3). +- PR2 splits into: FastAPI user app (1), FastStream gaps (2), security hardening (3). +- PR3 splits into: warning visibility (1), from_object docs (1), filterwarnings (1), + CI gate (1). + +The 3-PR grouping is the recommended ship cadence; finer splits are available if review +pressure or hot-fix urgency requires. + +If even 3 PRs is too much to commit to right now, the lowest-priority items that could +be deferred without risk: + +- LOG-6 (`_otel_apps` memory growth): theoretical for hot-reload setups only. +- LOG-7 / LOG-8 (re-bootstrap stacking): produces extra log lines / stack depth but no + correctness break thanks to idempotent teardown (CRIT-3 fix). +- UX-5 (`from_object` asymmetry): documentation only. + +These three together would shrink PR1 modestly and PR3 marginally, but the savings are +not large enough to justify a fourth split. diff --git a/planning/specs/2026-06-05-bug-audit-v2.md b/planning/specs/2026-06-05-bug-audit-v2.md new file mode 100644 index 0000000..210550d --- /dev/null +++ b/planning/specs/2026-06-05-bug-audit-v2.md @@ -0,0 +1,582 @@ +# lite-bootstrap — Bug Audit v2 (UX · Logic · Security · Tests) + +**Date:** 2026-06-05 +**Scope:** Full re-read of `lite_bootstrap/` and `tests/` against the 2026-05-31 audit, plus +fresh findings under four lenses (UX, logic, security, tests). +**Deliverable:** Prioritized findings report. No code changes. + +Each lens opens with a status table for the relevant prior findings +(`FIXED` / `PARTIAL` / `OPEN`) and is followed by **new** findings not in the prior audit. +External tooling: `pip-audit` (no CVEs in lockfile), `bandit` (two B101 assert findings — +both folded into LOG/SEC items below). + +--- + +## Lens 1 — UX (API ergonomics) + +### Prior-finding status + +| ID | Title | Status | Evidence | +|---|---|---|---| +| LOW-4 | `FastAPIConfig.application` inconsistent default pattern | **FIXED** | `UnsetType` sentinel via `types.UNSET`; `__post_init__` builds the app and bypasses freeze via `object.__setattr__` — pattern is now documented in `CLAUDE.md`. (`fastapi_bootstrapper.py:51, 63-67`) | +| LOW-6 | `MemoryLoggerFactory.__init__` takes 5 logging-config kwargs | **FIXED** | Replaced with `_MemoryLoggerFactoryConfig` dataclass. (`logging_factory.py:26-31, 41-50`) | +| LOW-7 | `FreeBootstrapperConfig` naming inconsistency | **FIXED** | Renamed `FreeConfig`; `FreeBootstrapperConfig = FreeConfig` alias preserved; both re-exported. (`free_bootstrapper.py:12, 38`, `__init__.py:4, 27-28`) | +| LOW-8 | `_unset_handlers` permanently mutates target loggers | **FIXED** (doc-only per spec) | Docstring added on the method documenting the irreversible mutation. (`logging_instrument.py:104-107`) | +| LOW-9 | `LoggingInstrument.teardown()` forces root logger to WARNING | **FIXED** (doc-only per spec) | Docstring on `teardown` warns that pre-existing user configuration is overwritten. (`logging_instrument.py:163-167`) | +| LOW-10 | `OpentelemetryConfig` capitalization inconsistency | **FIXED** | Renamed `OpenTelemetryConfig`; `OpentelemetryConfig = OpenTelemetryConfig` alias preserved. (`opentelemetry_instrument.py:42, 160`) | + +### New findings + +#### UX-1 · `FastAPIConfig` silently stomps user-supplied `app.title`, `.debug`, `.version` + +**File:** `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:73-75` + +`__post_init__` unconditionally executes: + +```python +application.title = self.service_name +application.debug = self.service_debug +application.version = self.service_version +``` + +This runs **in both branches** of the `isinstance(self.application, UnsetType)` check — +the branch where lite-bootstrap built the app, and the branch where the user supplied a +pre-configured `FastAPI(title="my-svc", version="3.1.4", debug=False)`. The user's values +are clobbered by lite-bootstrap defaults (`service_name="micro-service"`, +`service_version="1.0.0"`, `service_debug=True`). + +Reproduction: + +```python +user_app = FastAPI(title="My API", version="3.0.0") +cfg = FastAPIConfig(application=user_app) +# user_app.title == "micro-service", user_app.version == "1.0.0", user_app.debug == True +``` + +**Fix shape:** restrict the override to the UnsetType branch (lite-bootstrap-built apps), +or only set fields that match the `BaseConfig` default (i.e. the user didn't customize the +config). The latter is friendlier: if a user passes a configured app AND overrides +`service_name` on the config, both signals exist — pick the config (most explicit) but +warn. Add a regression test (see TEST-NEW-1). + +#### UX-2 · `FastStreamPrometheusInstrument.collector_registry` is a per-instance new registry + +**File:** `lite_bootstrap/bootstrappers/faststream_bootstrapper.py:146-148` + +```python +collector_registry: "prometheus_client.CollectorRegistry" = dataclasses.field( + default_factory=_make_collector_registry, init=False +) +``` + +A brand-new `CollectorRegistry` is created per instrument instance. Any +`prometheus_client.Counter(...)` defined elsewhere using the default `REGISTRY` will not +be exposed on the FastStream metrics endpoint. FastMCP takes the opposite tack +(`prometheus_client.REGISTRY` — see `fastmcp_bootstrapper.py:120`); the two backends are +inconsistent. + +**Fix shape:** add a `prometheus_collector_registry: prometheus_client.CollectorRegistry | None = None` +config field on `FastStreamConfig` (or on `PrometheusConfig` for symmetry across +bootstrappers) and let the user inject the default `REGISTRY` if they want it. Default +behaviour stays the same (isolated registry) unless they opt in. + +#### UX-3 · `FastStreamConfig` is missing `opentelemetry_excluded_urls` + +**Files:** `lite_bootstrap/bootstrappers/faststream_bootstrapper.py:63-72`, +`lite_bootstrap/instruments/opentelemetry_instrument.py:96-105` + +`FastAPIConfig` (`fastapi_bootstrapper.py:53`) and `LitestarConfig` +(`litestar_bootstrapper.py:114`) both declare `opentelemetry_excluded_urls`. `FastStreamConfig` +does not. `_build_excluded_urls` uses `getattr(..., "opentelemetry_excluded_urls", [])` +which silently returns `[]`. FastStream users with ASGI mounts (health, metrics, AsyncAPI) +can't exclude additional URLs from OTel tracing. + +**Fix shape:** add the field to `FastStreamConfig` matching the FastAPI/Litestar pattern. + +#### UX-4 · `InstrumentDependencyMissingWarning` invisible under `python -W ignore` + +**File:** `lite_bootstrap/bootstrappers/base.py:63-67` + +The "configured but optional dependency missing" path is the deployment surprise the +prior audit highlighted. It now uses `warnings.warn(..., InstrumentDependencyMissingWarning, +stacklevel=3)`. But: Python processes started with `-W ignore` or in environments that set +`PYTHONWARNINGS=ignore` will swallow this entirely. `build_summary()` (logged at INFO) is +the user's only fallback signal — and that may also be filtered. + +**Fix shape:** when `check_dependencies()` returns False on a configured instrument, also +emit a `logger.warning(...)` line in addition to the warning (warnings module decoupled +from logging). Keep the existing warning for tools that filter on category. Add a +`filterwarnings = ["error::InstrumentSkippedWarning"]` to `pyproject.toml [tool.pytest.ini_options]` +so the test suite escalates accidental regressions (see TEST-NEW-7). + +#### UX-5 · `from_object` cannot pass an explicit `None`; asymmetric with `from_dict` + +**File:** `lite_bootstrap/instruments/base.py:21-26` + +```python +prepared_data = {field: value for field in field_names if (value := getattr(obj, field, None)) is not None} +``` + +`from_dict({"service_name": None})` succeeds (`test_config.py:92-94`). `from_object(obj)` +where `obj.service_name = None` drops `service_name` and the default kicks in +(`test_config.py:54-62`). Both behaviors are now documented, but they're asymmetric. +A user migrating between the two will be surprised when "explicit None" works on one and +not the other. + +**Fix shape:** document the asymmetry inline (currently in docstrings but worth a sentence +under "Conventions" in CLAUDE.md). Or unify: drop the `is not None` filter on `from_object` +and accept any attribute that is *present*. Risk: external objects often have unrelated +`None`-valued attributes that would override defaults. Status quo is defensible; the +documentation fix is enough. + +--- + +## Lens 2 — Logic (correctness) + +### Prior-finding status + +| ID | Title | Status | Evidence | +|---|---|---|---| +| CRIT-1 | Redoc URL ignores `root_path` | **FIXED** | `redoc_html` now reads `root_path` from scope and prepends to `redoc_js_url` and `openapi_url`. (`fastapi_helpers.py:52-59`) | +| CRIT-2 | `OpenTelemetryInstrument.teardown()` doesn't shut down tracer provider | **FIXED** | `_tracer_provider` cached on the instrument; `teardown` calls `shutdown()` in a `try/finally` that resets the cache. (`opentelemetry_instrument.py:84-86, 121-123, 152-156`) | +| CRIT-3 | Litestar double-teardown not guarded | **FIXED** | `BaseBootstrapper.teardown()` returns immediately on `not self.is_bootstrapped`. (`bootstrappers/base.py:92-94`) | +| DES-1 | Per-framework instrument subclasses are pure type-annotation boilerplate | **FIXED** | `BaseInstrument(Generic[ConfigT])`; remaining subclasses all carry real bootstrap/teardown logic — no pure-annotation subclasses left. (`instruments/base.py:29-33`) | +| DES-2 | Duplicated OTel service fields across configs | **FIXED** | `OpenTelemetryServiceFieldsConfig` mixin; `OpenTelemetryConfig` and `PyroscopeConfig` both inherit. (`opentelemetry_instrument.py:36-39`, `pyroscope_instrument.py:14`) | +| DES-3 | `from_object` semantics differ from `from_dict` and aren't documented | **FIXED** | Both methods have docstrings; semantics pinned by tests. (`instruments/base.py:15-26`, `test_config.py:54-94`) | +| DES-4 | `skip_sentry` leaks into Sentry `contexts.structlog` | **FIXED** | `IGNORED_STRUCTLOG_ATTRIBUTES` now contains `"skip_sentry"`. (`sentry_instrument.py:19-21`) | +| DES-5 | Dead `is_X_installed` conjuncts in `is_ready()` | **FIXED** | `is_ready` replaced with `is_configured`; the redundant conjuncts are gone, the lifecycle is `is_configured → check_dependencies → instantiate`. (`bootstrappers/base.py:54-69`) | +| REF-1 | Duplicated `_build_excluded_urls()` | **FIXED** | Hoisted to `OpenTelemetryInstrument._build_excluded_urls`; framework subclasses use it. (`opentelemetry_instrument.py:96-105`) | +| REF-2 | Dead defensive check in `LitestarLoggingInstrument.bootstrap()` | **FIXED** | Branch removed; body is unindented. (`litestar_bootstrapper.py:159-175`) | +| REF-3 | `BaseInstrument` uses `abc.ABC` but no abstract methods | **FIXED** | Now just `Generic[ConfigT]`, no ABC, no `# noqa: B027`. (`instruments/base.py:32-49`) | +| REF-4 | `logging_instrument.py` is 212 lines doing four jobs | **FIXED** | Split into `logging_factory.py` (factory + serializer + protocols, 75 lines) and `logging_instrument.py` (config + instrument + tracer_injection, 179 lines). | +| REF-5 | `swagger_instrument.py` / `prometheus_instrument.py` carry no logic | **PARTIAL** | Files still tiny; module docstrings now explain the split (config in instruments, behavior in bootstrappers). Acceptable resolution. | +| REF-6 | `frozen=True` + `object.__setattr__` workaround | **PARTIAL** | `LoggingInstrument._logger_factory` and `OpenTelemetryInstrument._tracer_provider` now use non-frozen dataclasses with `init=False, default_factory=lambda: None`. `FastAPIConfig.application` still uses `object.__setattr__` inside `__post_init__` — but the pattern is now documented in CLAUDE.md as the project's accepted "frozen-config bypass" convention. | +| REF-7 | Hardcoded `timeout=5` in FastStream health check | **FIXED** | `faststream_health_check_broker_timeout: float = 5.0` added to `FastStreamConfig`. (`faststream_bootstrapper.py:71, 106`) | +| LOW-1 | `if not callback:` → `if callback is None` | **FIXED** | `sentry_instrument.py:81` | +| LOW-2 | `sentry_before_send` typing | **FIXED** | `sentry_types.EventProcessor | None`. (`sentry_instrument.py:36`) | +| LOW-3 | `_format_span` uses `os.linesep` | **FIXED** | Replaced with literal `"\n"`. (`opentelemetry_instrument.py:26`) | +| LOW-5 | `_otel_apps` keyed by `id(next_app)` | **FIXED** (per-spec: comment) | One-line comment added documenting the id-reuse assumption. (`litestar_bootstrapper.py:79-81`) | + +### New findings + +#### LOG-1 · `OpenTelemetryInstrument.teardown()` leaves a shut-down provider as the process-global + +**File:** `lite_bootstrap/instruments/opentelemetry_instrument.py:146-156` + +`bootstrap()` calls `set_tracer_provider(tracer_provider)` (line 122) — this sets the +**process-global** provider via the OTel SDK. `teardown()` calls +`self._tracer_provider.shutdown()` but does **not** reset the global. After teardown, +`opentelemetry.trace.get_tracer_provider()` still returns the shut-down provider; any +code that creates spans afterward (e.g., during a test that imports the OTel API but +doesn't bootstrap a new provider) emits to a dead provider and the spans go nowhere. + +This is invisible during normal microservice lifecycles (bootstrap once, run forever, exit) +but surfaces in test suites and any process that re-bootstraps (see LOG-7/LOG-8 below). + +**Fix shape:** after `self._tracer_provider.shutdown()`, set the global back to OTel's +no-op default: + +```python +from opentelemetry.trace import NoOpTracerProvider +set_tracer_provider(NoOpTracerProvider()) +``` + +Document the constraint that `set_tracer_provider` is one-way in OTel < 1.27 (the SDK +warns "Overriding of current TracerProvider is not allowed" on second call); the no-op +reset works because OTel allows replacing with a no-op for shutdown. Validate against the +OTel version pinned in `pyproject.toml`. Add TEST-NEW-2. + +#### LOG-2 · OTel bootstrap silently disables two stdlib loggers permanently + +**File:** `lite_bootstrap/instruments/opentelemetry_instrument.py:107-109` + +```python +def bootstrap(self) -> None: + logging.getLogger("opentelemetry.instrumentation.instrumentor").disabled = True + logging.getLogger("opentelemetry.trace").disabled = True +``` + +These loggers are process-global. `teardown()` does not restore `disabled = False`. Any +other code in the same process — including unrelated OTel integrations the user wires up +themselves — has those loggers muted for the rest of the process life. Symmetric to LOW-9 +on the logging instrument (which is now documented but undocumented here). + +**Fix shape:** capture the previous `disabled` state in `bootstrap()`, restore in +`teardown()`. Or, drop the mutation entirely and tell users to set a `logging.Filter` on +their root config if they don't want OTel noise — but that's a behavior change. The +record-and-restore fix is the minimum-surprise option. + +#### LOG-3 · `LoggingInstrument.teardown()` unprotected root-handler loop + +**File:** `lite_bootstrap/instruments/logging_instrument.py:168-178` + +```python +def teardown(self) -> None: + structlog.reset_defaults() + root_logger = logging.getLogger() + for h in root_logger.handlers[:]: + root_logger.removeHandler(h) + h.close() # ← unprotected; can raise + root_logger.setLevel(logging.WARNING) + if self._logger_factory is not None: + try: + self._logger_factory.close_handlers() + finally: + self._logger_factory = None +``` + +If `h.close()` raises mid-iteration, the remaining root handlers stay attached, the level +is never reset to WARNING, and `self._logger_factory.close_handlers()` is never reached. +The `_logger_factory` slot is also not nulled, so a follow-up `bootstrap` won't reset it +(actually it re-builds via `memory_logger_factory` getter, but the prior factory's +handlers stay dangling on their associated loggers). + +**Fix shape:** wrap the loop in `try/finally` so the level-reset and factory-cleanup +always run; aggregate `h.close()` errors and re-raise after cleanup, similar to +`BaseBootstrapper.teardown`'s `TeardownError` pattern (`bootstrappers/base.py:96-105`). +Add TEST-NEW-3. + +#### LOG-4 · `FastStreamLoggingInstrument.bootstrap()` mutates broker state with no symmetric teardown + +**File:** `lite_bootstrap/bootstrappers/faststream_bootstrapper.py:110-120` + +```python +def bootstrap(self) -> None: + super().bootstrap() + broker = self.bootstrap_config.application.broker + if broker is not None and import_checker.is_structlog_installed and import_checker.is_faststream_installed: + logger = structlog.get_logger("faststream") + logger.setLevel(self.bootstrap_config.faststream_log_level) + broker.config.logger.params_storage = ManualLoggerStorage(logger) +``` + +`broker.config.logger.params_storage` is mutated. No `teardown()` override — the parent +`LoggingInstrument.teardown()` only handles structlog + root logger. After teardown the +broker keeps the lite-bootstrap structlog logger as its `params_storage`. In a long-lived +test process, subsequent `bootstrap()` calls overwrite it (idempotent enough), but a user +who tears down and expects "default broker logger" gets the lite-bootstrap one. + +**Fix shape:** save the prior `params_storage` in `bootstrap()`, restore in a new +`teardown()` override on `FastStreamLoggingInstrument`. Or document that the broker logger +mutation is permanent (matching the LOW-8 precedent on `_unset_handlers`). + +#### LOG-5 · Assertion-based type narrowing is stripped under `python -O` + +**Files:** `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:78-80`, +`lite_bootstrap/instruments/pyroscope_instrument.py:35-37` + +Both raised by `bandit` B101. The asserts are not safety checks — they're type-narrowing +preconditions: + +```python +def _narrow_app(config: "FastAPIConfig") -> "fastapi.FastAPI": + assert not isinstance(config.application, UnsetType) + return config.application +``` + +Under `python -O` (production optimization flag for some deployments), the assert is +stripped. `_narrow_app` then returns `config.application` which can still be `UNSET` if +something bypassed `__post_init__` (`dataclasses.fields(FastAPIConfig)` mutation, +`from_dict` from an external source, etc.). Downstream FastAPI calls receive `UNSET` and +raise an opaque `TypeError` instead of a clear `RuntimeError` from the narrow function. + +Same in pyroscope: `assert self.bootstrap_config.pyroscope_endpoint is not None` is a +documented precondition for direct callers that bypass `is_configured`. + +**Fix shape:** replace both with explicit raises: + +```python +def _narrow_app(config: "FastAPIConfig") -> "fastapi.FastAPI": + if isinstance(config.application, UnsetType): + raise RuntimeError("FastAPIConfig.application is unset; __post_init__ did not run") + return config.application +``` + +Keep `# noqa: TRY003` (or extract to a constant) for the message. Updates resolve both +bandit B101 findings; both functions remain trivial. + +#### LOG-6 · `LitestarOpenTelemetryInstrumentationMiddleware._otel_apps` cache grows monotonically + +**File:** `lite_bootstrap/bootstrappers/litestar_bootstrapper.py:75-99` + +The `_otel_apps: dict[int, ASGIApp]` cache is keyed by `id(next_app)` and never evicted. +LOW-5 in the prior audit documented the id-reuse-after-GC assumption (now noted in a +comment). The remaining issue: in long-lived processes where Litestar rebuilds the +middleware chain (hot-reload, plugin add/remove, `from_config` on a mutated `AppConfig`), +old `next_app` references remain in the dict, holding their `OpenTelemetryMiddleware` +wrapper alive, which holds a reference to the user's ASGI app. Slow memory growth. + +Practical impact is low for typical microservice deployments (chain is built once at +startup). Theoretical for hot-reload setups. + +**Fix shape:** if the same `next_app` reference is the only key ever seen, the cache is +overkill — replace with a single `self._otel_app` and check `next_app is self._cached_app`. +If multiple chains do legitimately exist, switch to `weakref.WeakValueDictionary` so +garbage collection of `next_app` evicts its OTel wrapper. + +#### LOG-7 · `FastMcpBootstrapper.__init__` stacks `_TeardownProvider`s on app reuse + +**File:** `lite_bootstrap/bootstrappers/fastmcp_bootstrapper.py:152-154` + +```python +def __init__(self, bootstrap_config: FastMcpConfig) -> None: + super().__init__(bootstrap_config) + self.bootstrap_config.application.add_provider(_TeardownProvider(self.teardown)) +``` + +If a user constructs two `FastMcpBootstrapper` instances around the same FastMCP +`application` (e.g., a test that reuses the default `_make_fastmcp()` instance via +`FastMcpConfig()` twice — though current default factory creates a new one each call, +custom code may inject the same app), each adds its own `_TeardownProvider`. On ASGI +shutdown both teardowns fire. `BaseBootstrapper.teardown()` is idempotent so the second +teardown is a no-op, but it still iterates `self.instruments` and short-circuits on +`is_bootstrapped=False` — no harm but extra overhead and confusing logs. + +**Fix shape:** track whether this bootstrapper already attached its provider; refuse to +re-attach. Or document that one bootstrapper per FastMCP instance is required. + +#### LOG-8 · `FastAPIBootstrapper.__init__` stacks lifespan wrappers on app reuse + +**File:** `lite_bootstrap/bootstrappers/fastapi_bootstrapper.py:197-205` + +Same pattern as LOG-7: `_merge_lifespan_context(old, self.lifespan_manager)` builds a +chain. Constructing the bootstrapper twice against the same `fastapi.FastAPI()` instance +stacks two lifespan wrappers; on shutdown, both `finally` branches call `self.teardown()`. +Teardown is idempotent, so the second call short-circuits — practical impact: extra +log lines and stack-depth. Not a correctness bug, but a footgun for tests that +reuse a `FastAPI` app fixture across bootstrappers. + +**Fix shape:** detect that `application.router.lifespan_context` is already wrapped by +this bootstrapper (e.g., via a sentinel attribute on the bound method) and skip the +re-wrap. Or document the constraint. + +#### LOG-9 · `SentryInstrument` has no `teardown()` override + +**File:** `lite_bootstrap/instruments/sentry_instrument.py:94-125` + +`sentry_sdk.init(dsn=..., before_send=..., ...)` is process-global. After +`bootstrapper.teardown()`, Sentry remains initialized and continues to capture events. +For the intended lifecycle ("bootstrap once, run forever, exit") this is fine. For test +processes that bootstrap-and-teardown repeatedly, Sentry state leaks between cases — the +test suite works around this by calling `sentry_sdk.init()` in `finally` blocks +(`test_sentry_instrument.py:43, 65`), which is undocumented friction. + +**Fix shape:** add an explicit `teardown()` that calls `sentry_sdk.flush()` and then +`sentry_sdk.init()` (with no args, resetting the SDK to the no-op state). Document the +test-suite friction goes away as a side effect. + +--- + +## Lens 3 — Security + +### Prior-finding status + +The 2026-05-31 audit had no security category. None of its CRIT/DES/REF/LOW items +classify as security. All security findings below are new. + +External tooling outcomes: + +- **`pip-audit`** (against the lockfile exported via `uv export --all-extras --no-hashes`): + no known vulnerabilities in 133 packages. Result is current as of 2026-06-05. +- **`bandit -r lite_bootstrap`**: two low-severity B101 (`assert` usage) findings, + both folded into LOG-5 above. + +### New findings + +#### SEC-1 · `root_path` reflected into Swagger/Redoc HTML without escaping + +**File:** `lite_bootstrap/helpers/fastapi_helpers.py:37-59` + +```python +root_path = request.scope.get("root_path", "").rstrip("/") +return get_swagger_ui_html( + openapi_url=f"{root_path}{app_openapi_url}", + ... + swagger_js_url=f"{root_path}{static_path}/swagger-ui-bundle.js", + ... +) +``` + +`get_swagger_ui_html` (FastAPI builtin) interpolates these URLs into `