Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/ccbot/handlers/card_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,14 @@ class CardState:
# transcript still spans many turn-pages. ``/clear`` leaves it True:
# that is an intentional wipe-to-zero.
seed_attempted: bool = False
# Stall-recovery flag. Set by ``maybe_finalize_stalled`` after it
# appends the STALL_NOTE final_text. If the stall was a false positive
# (a genuine assistant turn arrives after), the next
# ``update_session_card`` / ``finalize_task`` wipes the card binding
# and lets ``_send_card`` spawn a fresh message below the stalled
# stub — so the recovered answer is visible instead of being silently
# edited into a card the user has scrolled past or marked complete.
stall_finalized: bool = False


def _trim(s: str, limit: int = 200) -> str:
Expand Down
74 changes: 69 additions & 5 deletions src/ccbot/handlers/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,31 @@ def reset_card(user_id: int, session_id: str) -> None:
_cards.pop((user_id, session_id), None)


def _recover_from_false_stall(state: CardState) -> None:
"""Wipe the live-card binding after a false-positive stall_finalize.

Set when a genuine assistant turn lands AFTER
``maybe_finalize_stalled`` armed ``state.stall_finalized``. Clears
msg_id / events / pagination so the next render path goes through
``_send_card`` (fresh message below the stalled stub) rather than
``_edit_card`` (silent edit of the now-finalized card). The stalled
stub stays in chat history with its STALL_NOTE — we don't rewrite
it; the recovery message appears as a fresh card with
``is_continuation=True`` so the header carries the ``…continued``
marker.
"""
if state.pending_edit is not None and not state.pending_edit.done():
state.pending_edit.cancel()
state.pending_edit = None
state.msg_id = None
state.events = []
state.current_page_idx = None
state.is_continuation = True
state.last_rendered = ""
state.seed_attempted = False
state.stall_finalized = False


async def cancel_pending_card_edits(timeout: float = 2.0) -> None:
"""Cancel + drain every deferred ``_edit_card`` task across all cards.

Expand Down Expand Up @@ -1498,6 +1523,12 @@ async def _update_session_card_locked(
new_event: Event,
replaced: bool,
) -> None:
# Recover from a prior false-positive stall_finalize. Wipe the card
# binding so this real assistant turn lands on a fresh message
# below the stalled stub instead of being silently edited into it.
if state.stall_finalized:
_recover_from_false_stall(state)
await _ensure_seeded(user_id, sess, state)
# Trigger: long pause → fresh card.
if _is_stale(state):
state.msg_id = None
Expand Down Expand Up @@ -1619,6 +1650,14 @@ async def finalize_task(bot: Bot, user_id: int, sess: Session, final_text: str)
``paginate_events``.
"""
state = get_card_state(user_id, sess)
# Recover from a prior false-positive stall_finalize: wipe the card
# binding so this real answer spawns a fresh card below the stalled
# stub. Must run before ``_ensure_seeded`` so the seed targets the
# cleared events list. NOT triggered by ``maybe_finalize_stalled``'s
# own call into ``finalize_task`` — the flag is set only AFTER that
# path returns.
if state.stall_finalized:
_recover_from_false_stall(state)
# First event after a bot restart: seed JSONL history before
# appending the final answer so the user sees their context.
await _ensure_seeded(user_id, sess, state)
Expand Down Expand Up @@ -1850,7 +1889,16 @@ def is_card_busy(user_id: int, session_id: str) -> bool:
# True and this never fires during long thinking / a slow tool. We only
# trip when the spinner is gone or frozen AND no new renderable event
# arrived for this long — i.e. the subprocess produced nothing.
#
# Two-tier threshold by tail event type. Tools legitimately run for
# minutes (slow Bash, CHYT, Map-Reduce, network) and Claude routinely
# spends a comparable amount of time reasoning after the last tool
# result before emitting the final assistant turn — both produce a
# silent JSONL tail of ``tool_use``. Pre-textual silence (``text`` /
# ``thinking`` tail) is rarer and more suspicious because Claude is
# mid-emit, so the original threshold still applies there.
STALL_FINALIZE_AFTER_SECONDS = 90.0
STALL_FINALIZE_TOOL_USE_SECONDS = 300.0

# Note appended to the card when a stall is detected. Card-body strings
# in this module are not localized (header / "context:" / "goal:" are
Expand Down Expand Up @@ -1906,30 +1954,46 @@ async def maybe_finalize_stalled(
if state.in_menu_view or state.in_kb_mode:
return False
# Already finalized — nothing frozen to rescue.
if state.events[-1].type in ("final_text", "error"):
tail_type = state.events[-1].type
if tail_type in ("final_text", "error"):
return False
if state.last_event_ts <= 0:
return False
when = now if now is not None else time.time()
if (when - state.last_event_ts) < STALL_FINALIZE_AFTER_SECONDS:
threshold = (
STALL_FINALIZE_TOOL_USE_SECONDS
if tail_type == "tool_use"
else STALL_FINALIZE_AFTER_SECONDS
)
if (when - state.last_event_ts) < threshold:
return False
logger.warning(
"stall_finalize user=%d sess=%s wid=%s idle=%.0fs tail=%s",
"stall_finalize user=%d sess=%s wid=%s idle=%.0fs tail=%s threshold=%.0fs",
user_id,
sess.id,
sess.window_id,
when - state.last_event_ts,
state.events[-1].type,
tail_type,
threshold,
extra={
"event": "stall_finalize",
"user_id": user_id,
"session_id": sess.id,
"window_id": sess.window_id,
"idle_seconds": round(when - state.last_event_ts),
"tail_type": state.events[-1].type,
"tail_type": tail_type,
"threshold_seconds": round(threshold),
},
)
await finalize_task(bot, user_id, sess, STALL_NOTE)
# Arm the false-positive recovery: if a real assistant turn arrives
# after this, the next ``update_session_card`` / ``finalize_task``
# spawns a fresh card below the stalled stub instead of silently
# editing it. ``finalize_task`` already ran and re-fetched ``state``,
# so re-read from ``_cards`` to set the flag on the same instance.
post_state = _cards.get((user_id, sess.id))
if post_state is not None:
post_state.stall_finalized = True
return True


Expand Down
213 changes: 213 additions & 0 deletions tests/ccbot/handlers/test_stall_recovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
"""Regression tests for stall_finalize false-positive recovery.

When ``maybe_finalize_stalled`` fires too eagerly (e.g. the metrics-debug
incident on 2026-06-17: ``tail=tool_use idle=96s`` while Claude was
reasoning toward the final answer), a genuine assistant turn lands
*after* the STALL_NOTE was already appended to the card. Without the
recovery path the real reply would be silently edited into a card the
user has already scrolled past or marked as complete.

The fix: ``maybe_finalize_stalled`` sets ``state.stall_finalized=True``
after the STALL_NOTE finalize_task lands. Both ``update_session_card``
and ``finalize_task`` check the flag on entry and wipe the binding via
``_recover_from_false_stall`` so the next render lands as a fresh
``_send_card`` below the stub.
"""

from __future__ import annotations

import time
from typing import Any
from unittest.mock import AsyncMock, MagicMock

import pytest

from ccbot.handlers import notifications
from ccbot.handlers.notifications import (
CardState,
Event,
_card_locks,
_cards,
_recover_from_false_stall,
_repost_intent,
)
from ccbot.session_models import Session
from ccbot.session_monitor import NewMessage


@pytest.fixture(autouse=True)
def _clear_card_state():
_cards.clear()
_card_locks.clear()
_repost_intent.clear()
yield
_cards.clear()
_card_locks.clear()
_repost_intent.clear()


def _make_sess(sid: str = "s1") -> Session:
return Session(
id=sid,
name="test",
window_id="@1",
workdir="/tmp",
state="active",
claude_session_id="uuid-" + sid,
)


def _stalled_msg(text: str = "real answer at last") -> NewMessage:
return NewMessage(
session_id="uuid-s1",
text=text,
is_complete=True,
content_type="text",
role="assistant",
stop_reason="end_turn",
)


def _seed_stalled_card(user_id: int, sess: Session, *, msg_id: int = 999) -> CardState:
"""Seed a card in the post-stall_finalize state: STALL_NOTE landed,
flag armed, the original tool_use + STALL_NOTE final_text are in
events. This mirrors what ``maybe_finalize_stalled`` leaves behind."""
now = time.time()
state = _cards.setdefault((user_id, sess.id), CardState())
state.msg_id = msg_id
state.events = [
Event(type="user_msg", text="run the analysis", started_at=now - 600),
Event(type="tool_use", text="Bash(…)", started_at=now - 500),
Event(
type="final_text",
text=notifications.STALL_NOTE,
started_at=now - 100,
),
]
state.last_event_ts = now - 100
state.stall_finalized = True
return state


def test_recover_helper_wipes_binding_and_flag():
"""``_recover_from_false_stall`` clears msg_id, events, last_rendered
AND the flag itself; sets is_continuation + clears seed_attempted so
the next event re-pulls JSONL context."""
state = CardState()
state.msg_id = 7177
state.events = [Event(type="final_text", text="stub", started_at=time.time())]
state.last_rendered = "rendered stub"
state.stall_finalized = True
state.seed_attempted = True
state.current_page_idx = 3
state.is_continuation = False

_recover_from_false_stall(state)

assert state.msg_id is None
assert state.events == []
assert state.last_rendered == ""
assert state.stall_finalized is False
assert state.seed_attempted is False
assert state.current_page_idx is None
assert state.is_continuation is True


@pytest.mark.asyncio
async def test_update_session_card_recovers_after_stall(monkeypatch):
"""A genuine assistant turn arriving after a false stall_finalize
must spawn a FRESH card (``_send_card``), NOT edit the stalled stub
(``_edit_card``). The recovery path is triggered by
``state.stall_finalized=True``."""
sess = _make_sess()
bot = AsyncMock()
sent: list[Any] = []
edits: list[Any] = []

async def fake_send_card(b, uid, s, st, *, text, reply_markup=None):
st.msg_id = 5000 + len(sent)
sent.append(text)

async def fake_edit_card(b, uid, st, *, text, reply_markup=None):
edits.append(text)
return True

monkeypatch.setattr(notifications, "_send_card", fake_send_card)
monkeypatch.setattr(notifications, "_edit_card", fake_edit_card)
monkeypatch.setattr(notifications, "_ensure_seeded", AsyncMock(return_value=None))
fake_active = MagicMock()
fake_active.id = sess.id
monkeypatch.setattr(
notifications.session_manager,
"get_active_session",
lambda uid: fake_active,
)
monkeypatch.setattr(
notifications.session_manager,
"get_user_settings",
lambda uid: {"live_lag": 0},
)

user_id = 42
state = _seed_stalled_card(user_id, sess, msg_id=7177)
assert state.stall_finalized is True
assert state.msg_id == 7177

await notifications.update_session_card(bot, user_id, sess, _stalled_msg())

assert len(sent) == 1, f"expected fresh card spawn, got sends={sent} edits={edits}"
assert edits == [], "stalled stub must not be edited"
assert _cards[(user_id, sess.id)].stall_finalized is False


@pytest.mark.asyncio
async def test_finalize_task_recovers_after_stall(monkeypatch):
"""A real end-of-turn assistant text arriving after a false stall
routes through ``finalize_task`` (not update_session_card). It also
must spawn a fresh card below the stub."""
sess = _make_sess()
bot = AsyncMock()
sent: list[Any] = []
edits: list[Any] = []

async def fake_send_card(b, uid, s, st, *, text, reply_markup=None):
st.msg_id = 6000 + len(sent)
sent.append(text)

async def fake_edit_card(b, uid, st, *, text, reply_markup=None):
edits.append(text)
return True

monkeypatch.setattr(notifications, "_send_card", fake_send_card)
monkeypatch.setattr(notifications, "_edit_card", fake_edit_card)
monkeypatch.setattr(notifications, "_ensure_seeded", AsyncMock(return_value=None))
fake_active = MagicMock()
fake_active.id = sess.id
monkeypatch.setattr(
notifications.session_manager,
"get_active_session",
lambda uid: fake_active,
)
monkeypatch.setattr(
notifications.session_manager,
"get_user_settings",
lambda uid: {"live_lag": 0, "card_page_lines": 45},
)
# ``finalize_task`` calls ``prewarm_pages_cache`` which would hit the
# JSONL file system path — stub it to a no-op.
fake_prewarm = AsyncMock(return_value=None)
monkeypatch.setattr(notifications, "_send_attachments", AsyncMock())

import ccbot.handlers.history as _history_mod

monkeypatch.setattr(_history_mod, "prewarm_pages_cache", fake_prewarm)

user_id = 42
state = _seed_stalled_card(user_id, sess, msg_id=7177)
assert state.stall_finalized is True

await notifications.finalize_task(bot, user_id, sess, "the real BT_fin answer")

assert len(sent) == 1, f"expected fresh card spawn, got sends={sent} edits={edits}"
assert edits == [], "stalled stub must not be edited by finalize_task"
assert _cards[(user_id, sess.id)].stall_finalized is False
10 changes: 8 additions & 2 deletions tests/e2e/test_stalled_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ccbot.handlers import notifications
from ccbot.handlers.notifications import (
STALL_FINALIZE_AFTER_SECONDS,
STALL_FINALIZE_TOOL_USE_SECONDS,
STALL_NOTE,
CardState,
Event,
Expand All @@ -41,11 +42,16 @@

def _seed_frozen_card(*, tail_type: str = "tool_use") -> CardState:
"""Install an active-session card whose tail is non-terminal and whose
last event is older than the stall threshold."""
last event is older than the per-tail-type stall threshold."""
threshold = (
STALL_FINALIZE_TOOL_USE_SECONDS
if tail_type == "tool_use"
else STALL_FINALIZE_AFTER_SECONDS
)
now = time.time()
state = CardState()
state.msg_id = 7777
state.last_event_ts = now - (STALL_FINALIZE_AFTER_SECONDS + 30)
state.last_event_ts = now - (threshold + 30)
state.last_edit_ts = 0.0
state.events = [
Event(type="user_msg", text="do the long thing", started_at=now - 200),
Expand Down
Loading
Loading