Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/ccbot/handlers/card_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ class CardState:
# transcript still spans many turn-pages. ``/clear`` leaves it True:
# that is an intentional wipe-to-zero.
seed_attempted: bool = False
# Transcript mtime (epoch seconds) at the last *empty* seed attempt, or
# -1.0 if never attempted. A freshly restored (``claude --resume``)
# session creates its card before claude has flushed the resumed
# transcript, so an early seed reads [] and must retry on a later event.
# ``_ensure_seeded`` only re-parses the (possibly multi-MB) JSONL once
# this advances, so a burst of events during the resume window costs one
# stat() each, not a full re-parse. Reset alongside ``seed_attempted``
# at the non-destructive re-seed sites.
seed_mtime: float = -1.0
# Stall-recovery flag. Set by ``maybe_finalize_stalled`` after it
# appends the STALL_NOTE final_text. If the stall was a false positive
# (a genuine assistant turn arrives after), the next
Expand Down
48 changes: 43 additions & 5 deletions src/ccbot/handlers/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,19 +459,53 @@ async def _seed_events_from_jsonl(
return events


def _transcript_mtime(sess: Session) -> float:
"""Return the mtime (epoch seconds) of the session's JSONL transcript,
or -1.0 if the path can't be resolved / the file is missing.

Cheap (single ``stat``) — used by ``_ensure_seeded`` to gate empty-seed
retries on a restored session without re-parsing the whole transcript.
"""
if not sess.window_id:
return -1.0
from ..session_claude_io import build_session_file_path

state = session_manager.get_window_state(sess.window_id)
if not state.session_id or not state.cwd:
return -1.0
fp = build_session_file_path(state.session_id, state.cwd)
if fp is None:
return -1.0
try:
return fp.stat().st_mtime
except OSError:
return -1.0


async def _ensure_seeded(user_id: int, sess: Session, state: CardState) -> None:
"""Seed ``state.events`` from JSONL on first access after restart.

No-op when events already exist or when seeding has been attempted
before for this state. Idempotent — ``state.seed_attempted`` guards
against repeated JSONL reads. A wipe site that wants a re-seed clears
``seed_attempted`` (see ``CardState.seed_attempted``).
No-op when events already exist. Latches ``seed_attempted`` only on a
*successful* (non-empty) seed: a freshly restored (``claude --resume``)
session builds its card before claude has flushed the resumed transcript
to disk, so an early read returns [] — latching then would block the
seed forever and the history would never reach the card. An empty read
instead leaves the flag clear and retries on a later event, gated on the
transcript mtime advancing (``state.seed_mtime``) so a burst of events
during the resume window doesn't re-parse a multi-MB JSONL each time. A
wipe site that wants a re-seed clears ``seed_attempted`` + ``seed_mtime``
(see ``CardState.seed_attempted``).
"""
if state.events:
return
if state.seed_attempted:
return
state.seed_attempted = True
mtime = _transcript_mtime(sess)
if mtime >= 0.0 and mtime == state.seed_mtime:
# Nothing new on disk since the last empty attempt — skip the
# re-parse and wait for the transcript to grow.
return
state.seed_mtime = mtime
# User-settable depth — Settings → Card history (10/20/50/100).
try:
max_turns = int(
Expand All @@ -484,6 +518,7 @@ async def _ensure_seeded(user_id: int, sess: Session, state: CardState) -> None:
seeded = await _seed_events_from_jsonl(sess, max_turns=max_turns)
if seeded:
state.events = seeded
state.seed_attempted = True
logger.info(
"card_seeded user=%d sess=%s events=%d",
user_id,
Expand Down Expand Up @@ -598,6 +633,7 @@ def _recover_from_false_stall(state: CardState) -> None:
state.is_continuation = True
state.last_rendered = ""
state.seed_attempted = False
state.seed_mtime = -1.0
state.stall_finalized = False


Expand Down Expand Up @@ -902,6 +938,7 @@ def release_card_message(user_id: int, session_id: str) -> None:
# to re-seed so its footer page counter reflects the real recent
# turn-history instead of collapsing to ``1/1``.
state.seed_attempted = False
state.seed_mtime = -1.0
logger.info(
"card_release user=%d sess=%s",
user_id,
Expand Down Expand Up @@ -1541,6 +1578,7 @@ async def _update_session_card_locked(
# at a time and the footer page counter shows ``1/1`` until a
# second turn completes — even though the transcript is long.
state.seed_attempted = False
state.seed_mtime = -1.0
await _ensure_seeded(user_id, sess, state)

if not replaced and not _duplicate_of_seeded(state.events, new_event):
Expand Down
116 changes: 105 additions & 11 deletions tests/ccbot/handlers/test_card_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,25 +104,119 @@ def _bp(_sid, _cwd):
assert called["path"] == 0 # no JSONL read because events present
assert len(state.events) == 1 # untouched

async def test_seed_attempted_only_once(self, monkeypatch) -> None:
async def test_successful_seed_latches(self, tmp_path: Path, monkeypatch) -> None:
# A non-empty seed sets ``seed_attempted`` so later calls short-out.
import ccbot.session_claude_io as scio
from ccbot.session import Session, session_manager

ws = session_manager.get_window_state("@seed-once")
jsonl = tmp_path / "session.jsonl"
_write_jsonl(
jsonl,
[
{
"type": "user",
"message": {"role": "user", "content": "hi"},
"timestamp": "2026-05-15T09:00:00Z",
},
{
"type": "assistant",
"message": {
"role": "assistant",
"content": [{"type": "text", "text": "ok"}],
"stop_reason": "end_turn",
"usage": {"input_tokens": 1, "output_tokens": 1},
},
"timestamp": "2026-05-15T09:00:01Z",
},
],
)
ws = session_manager.get_window_state("@seed-latch")
ws.session_id = "sess-uuid"
ws.cwd = "/some/dir"
called = {"path": 0}
monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: jsonl)
state = CardState()
sess = Session(id="x", name="y", window_id="@seed-latch")
await _ensure_seeded(1, sess, state)
assert len(state.events) >= 1
assert state.seed_attempted is True

async def test_empty_seed_not_latched_retries_when_transcript_lands(
self, tmp_path: Path, monkeypatch
) -> None:
# Regression: a restored (``claude --resume``) session builds its
# card before claude has flushed the resumed transcript. The early
# read returns [] — it must NOT latch ``seed_attempted``, so that a
# later event (once the transcript is on disk) seeds the history.
import ccbot.session_claude_io as scio
from ccbot.session import Session, session_manager

def _bp(_sid, _cwd):
called["path"] += 1
return Path("/nonexistent-seed.jsonl") # not exists → empty seed
ws = session_manager.get_window_state("@seed-restore")
ws.session_id = "sess-uuid"
ws.cwd = "/some/dir"
jsonl = tmp_path / "resumed.jsonl" # not flushed yet
monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: jsonl)
state = CardState()
sess = Session(id="x", name="y", window_id="@seed-restore")

monkeypatch.setattr(scio, "build_session_file_path", _bp)
# 1) transcript missing → empty seed, not latched.
await _ensure_seeded(1, sess, state)
assert state.events == []
assert state.seed_attempted is False

# 2) claude flushes the resumed transcript.
_write_jsonl(
jsonl,
[
{
"type": "user",
"message": {"role": "user", "content": "earlier turn"},
"timestamp": "2026-05-15T09:00:00Z",
},
{
"type": "assistant",
"message": {
"role": "assistant",
"content": [{"type": "text", "text": "earlier reply"}],
"stop_reason": "end_turn",
"usage": {"input_tokens": 10, "output_tokens": 5},
},
"timestamp": "2026-05-15T09:00:01Z",
},
],
)

# 3) next event re-seeds (mtime advanced) → history lands + latches.
await _ensure_seeded(1, sess, state)
assert len(state.events) >= 1
assert state.seed_attempted is True

async def test_unchanged_empty_transcript_not_reparsed(
self, tmp_path: Path, monkeypatch
) -> None:
# An existing but content-less transcript yields []; the mtime gate
# must suppress re-parsing it on every event until it changes.
import ccbot.handlers.notifications as notif
import ccbot.session_claude_io as scio
from ccbot.session import Session, session_manager

ws = session_manager.get_window_state("@seed-gate")
ws.session_id = "sess-uuid"
ws.cwd = "/some/dir"
f = tmp_path / "empty.jsonl"
f.write_text("") # exists, empty → empty seed
monkeypatch.setattr(scio, "build_session_file_path", lambda _s, _c: f)
calls = {"n": 0}

async def _spy(_sess, max_turns=0):
calls["n"] += 1
return []

monkeypatch.setattr(notif, "_seed_events_from_jsonl", _spy)
state = CardState()
sess = Session(id="x", name="y", window_id="@seed-once")
sess = Session(id="x", name="y", window_id="@seed-gate")
await _ensure_seeded(1, sess, state)
await _ensure_seeded(1, sess, state)
await _ensure_seeded(1, sess, state)
# Even with three calls, the path resolver fires exactly once
# guarded by ``state.seed_attempted``.
assert called["path"] == 1
# mtime never advanced → parsed exactly once; never latched.
assert calls["n"] == 1
assert state.seed_attempted is False
Loading