From 57cda038c918530a5d460818320daf08c40db36d Mon Sep 17 00:00:00 2001 From: Time4Mind <119820237+Time4Mind@users.noreply.github.com> Date: Sat, 20 Jun 2026 11:40:08 +0300 Subject: [PATCH] fix(card): seed dialog history into restored-session card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A restored (`claude --resume`) session builds its live card before claude has flushed the resumed transcript to disk. `_ensure_seeded` ran during that window, read an empty/partial JSONL, returned [], but latched `seed_attempted = True` *before* the read — permanently blocking the seed. By the time the full transcript landed (and the first live event arrived) the guard short-circuited, so the card showed only new turns and the prior dialog history never appeared. Plain bot-restart worked because the JSONL was already fully written, so the first attempt succeeded. Latch `seed_attempted` only on a *successful* (non-empty) seed; on an empty read leave it clear and retry on a later event. Gate the retry on the transcript mtime advancing (new `CardState.seed_mtime`) so a burst of events during the resume window doesn't re-parse a multi-MB JSONL each time. The three non-destructive re-seed sites reset `seed_mtime` alongside `seed_attempted`. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ccbot/handlers/card_model.py | 9 ++ src/ccbot/handlers/notifications.py | 48 ++++++++-- tests/ccbot/handlers/test_card_seed.py | 116 ++++++++++++++++++++++--- 3 files changed, 157 insertions(+), 16 deletions(-) diff --git a/src/ccbot/handlers/card_model.py b/src/ccbot/handlers/card_model.py index fa9c7d1e..260a5213 100644 --- a/src/ccbot/handlers/card_model.py +++ b/src/ccbot/handlers/card_model.py @@ -200,6 +200,15 @@ class CardState: # transcript still spans many turn-pages. ``/clear`` leaves it True: # that is an intentional wipe-to-zero. seed_attempted: bool = False + # Transcript mtime (epoch seconds) at the last *empty* seed attempt, or + # -1.0 if never attempted. A freshly restored (``claude --resume``) + # session creates its card before claude has flushed the resumed + # transcript, so an early seed reads [] and must retry on a later event. + # ``_ensure_seeded`` only re-parses the (possibly multi-MB) JSONL once + # this advances, so a burst of events during the resume window costs one + # stat() each, not a full re-parse. Reset alongside ``seed_attempted`` + # at the non-destructive re-seed sites. + seed_mtime: float = -1.0 # Stall-recovery flag. Set by ``maybe_finalize_stalled`` after it # appends the STALL_NOTE final_text. If the stall was a false positive # (a genuine assistant turn arrives after), the next diff --git a/src/ccbot/handlers/notifications.py b/src/ccbot/handlers/notifications.py index 0047d38b..6e92663e 100644 --- a/src/ccbot/handlers/notifications.py +++ b/src/ccbot/handlers/notifications.py @@ -459,19 +459,53 @@ async def _seed_events_from_jsonl( return events +def _transcript_mtime(sess: Session) -> float: + """Return the mtime (epoch seconds) of the session's JSONL transcript, + or -1.0 if the path can't be resolved / the file is missing. + + Cheap (single ``stat``) — used by ``_ensure_seeded`` to gate empty-seed + retries on a restored session without re-parsing the whole transcript. + """ + if not sess.window_id: + return -1.0 + from ..session_claude_io import build_session_file_path + + state = session_manager.get_window_state(sess.window_id) + if not state.session_id or not state.cwd: + return -1.0 + fp = build_session_file_path(state.session_id, state.cwd) + if fp is None: + return -1.0 + try: + return fp.stat().st_mtime + except OSError: + return -1.0 + + async def _ensure_seeded(user_id: int, sess: Session, state: CardState) -> None: """Seed ``state.events`` from JSONL on first access after restart. - No-op when events already exist or when seeding has been attempted - before for this state. Idempotent — ``state.seed_attempted`` guards - against repeated JSONL reads. A wipe site that wants a re-seed clears - ``seed_attempted`` (see ``CardState.seed_attempted``). + No-op when events already exist. Latches ``seed_attempted`` only on a + *successful* (non-empty) seed: a freshly restored (``claude --resume``) + session builds its card before claude has flushed the resumed transcript + to disk, so an early read returns [] — latching then would block the + seed forever and the history would never reach the card. An empty read + instead leaves the flag clear and retries on a later event, gated on the + transcript mtime advancing (``state.seed_mtime``) so a burst of events + during the resume window doesn't re-parse a multi-MB JSONL each time. A + wipe site that wants a re-seed clears ``seed_attempted`` + ``seed_mtime`` + (see ``CardState.seed_attempted``). """ if state.events: return if state.seed_attempted: return - state.seed_attempted = True + mtime = _transcript_mtime(sess) + if mtime >= 0.0 and mtime == state.seed_mtime: + # Nothing new on disk since the last empty attempt — skip the + # re-parse and wait for the transcript to grow. + return + state.seed_mtime = mtime # User-settable depth — Settings → Card history (10/20/50/100). try: max_turns = int( @@ -484,6 +518,7 @@ async def _ensure_seeded(user_id: int, sess: Session, state: CardState) -> None: seeded = await _seed_events_from_jsonl(sess, max_turns=max_turns) if seeded: state.events = seeded + state.seed_attempted = True logger.info( "card_seeded user=%d sess=%s events=%d", user_id, @@ -598,6 +633,7 @@ def _recover_from_false_stall(state: CardState) -> None: state.is_continuation = True state.last_rendered = "" state.seed_attempted = False + state.seed_mtime = -1.0 state.stall_finalized = False @@ -902,6 +938,7 @@ def release_card_message(user_id: int, session_id: str) -> None: # to re-seed so its footer page counter reflects the real recent # turn-history instead of collapsing to ``1/1``. state.seed_attempted = False + state.seed_mtime = -1.0 logger.info( "card_release user=%d sess=%s", user_id, @@ -1541,6 +1578,7 @@ async def _update_session_card_locked( # at a time and the footer page counter shows ``1/1`` until a # second turn completes — even though the transcript is long. state.seed_attempted = False + state.seed_mtime = -1.0 await _ensure_seeded(user_id, sess, state) if not replaced and not _duplicate_of_seeded(state.events, new_event): diff --git a/tests/ccbot/handlers/test_card_seed.py b/tests/ccbot/handlers/test_card_seed.py index d3769d2c..8b325bd2 100644 --- a/tests/ccbot/handlers/test_card_seed.py +++ b/tests/ccbot/handlers/test_card_seed.py @@ -104,25 +104,119 @@ def _bp(_sid, _cwd): assert called["path"] == 0 # no JSONL read because events present assert len(state.events) == 1 # untouched - async def test_seed_attempted_only_once(self, monkeypatch) -> None: + async def test_successful_seed_latches(self, tmp_path: Path, monkeypatch) -> None: + # A non-empty seed sets ``seed_attempted`` so later calls short-out. import ccbot.session_claude_io as scio from ccbot.session import Session, session_manager - ws = session_manager.get_window_state("@seed-once") + jsonl = tmp_path / "session.jsonl" + _write_jsonl( + jsonl, + [ + { + "type": "user", + "message": {"role": "user", "content": "hi"}, + "timestamp": "2026-05-15T09:00:00Z", + }, + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "ok"}], + "stop_reason": "end_turn", + "usage": {"input_tokens": 1, "output_tokens": 1}, + }, + "timestamp": "2026-05-15T09:00:01Z", + }, + ], + ) + ws = session_manager.get_window_state("@seed-latch") ws.session_id = "sess-uuid" ws.cwd = "/some/dir" - called = {"path": 0} + monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: jsonl) + state = CardState() + sess = Session(id="x", name="y", window_id="@seed-latch") + await _ensure_seeded(1, sess, state) + assert len(state.events) >= 1 + assert state.seed_attempted is True + + async def test_empty_seed_not_latched_retries_when_transcript_lands( + self, tmp_path: Path, monkeypatch + ) -> None: + # Regression: a restored (``claude --resume``) session builds its + # card before claude has flushed the resumed transcript. The early + # read returns [] — it must NOT latch ``seed_attempted``, so that a + # later event (once the transcript is on disk) seeds the history. + import ccbot.session_claude_io as scio + from ccbot.session import Session, session_manager - def _bp(_sid, _cwd): - called["path"] += 1 - return Path("/nonexistent-seed.jsonl") # not exists → empty seed + ws = session_manager.get_window_state("@seed-restore") + ws.session_id = "sess-uuid" + ws.cwd = "/some/dir" + jsonl = tmp_path / "resumed.jsonl" # not flushed yet + monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: jsonl) + state = CardState() + sess = Session(id="x", name="y", window_id="@seed-restore") - monkeypatch.setattr(scio, "build_session_file_path", _bp) + # 1) transcript missing → empty seed, not latched. + await _ensure_seeded(1, sess, state) + assert state.events == [] + assert state.seed_attempted is False + + # 2) claude flushes the resumed transcript. + _write_jsonl( + jsonl, + [ + { + "type": "user", + "message": {"role": "user", "content": "earlier turn"}, + "timestamp": "2026-05-15T09:00:00Z", + }, + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "earlier reply"}], + "stop_reason": "end_turn", + "usage": {"input_tokens": 10, "output_tokens": 5}, + }, + "timestamp": "2026-05-15T09:00:01Z", + }, + ], + ) + + # 3) next event re-seeds (mtime advanced) → history lands + latches. + await _ensure_seeded(1, sess, state) + assert len(state.events) >= 1 + assert state.seed_attempted is True + + async def test_unchanged_empty_transcript_not_reparsed( + self, tmp_path: Path, monkeypatch + ) -> None: + # An existing but content-less transcript yields []; the mtime gate + # must suppress re-parsing it on every event until it changes. + import ccbot.handlers.notifications as notif + import ccbot.session_claude_io as scio + from ccbot.session import Session, session_manager + + ws = session_manager.get_window_state("@seed-gate") + ws.session_id = "sess-uuid" + ws.cwd = "/some/dir" + f = tmp_path / "empty.jsonl" + f.write_text("") # exists, empty → empty seed + monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: f) + calls = {"n": 0} + + async def _spy(_sess, max_turns=0): + calls["n"] += 1 + return [] + + monkeypatch.setattr(notif, "_seed_events_from_jsonl", _spy) state = CardState() - sess = Session(id="x", name="y", window_id="@seed-once") + sess = Session(id="x", name="y", window_id="@seed-gate") await _ensure_seeded(1, sess, state) await _ensure_seeded(1, sess, state) await _ensure_seeded(1, sess, state) - # Even with three calls, the path resolver fires exactly once — - # guarded by ``state.seed_attempted``. - assert called["path"] == 1 + # mtime never advanced → parsed exactly once; never latched. + assert calls["n"] == 1 + assert state.seed_attempted is False