diff --git a/README.md b/README.md index ea1bcae..3504efd 100644 --- a/README.md +++ b/README.md @@ -290,6 +290,24 @@ awkward (FastAPI middleware, Celery tasks). The SDK uses `contextvars`, so concurrent `asyncio.gather` tasks each keep their own span stack. No cross-task `parent_id` contamination. +### Mid-conversation resets + +When a user clears chat history or a planner restarts from a checkpoint +mid-trace, call `wikitrace.session_reset()` to mark the boundary. +Spans before and after share the same `session_id` (cost rollups and +user attribution stay grouped) but carry distinct `session_segment` +integers so the dashboard renders them as separate threads. + +```python +with wikitrace.session(id="conv-1", user="alice"): + chain.invoke({"input": q1}) # session_segment=0 + wikitrace.session_reset() # → 1 + chain.invoke({"input": "start over"}) # session_segment=1 +``` + +Outside an active session it's a no-op — safe to call from library +code that doesn't know whether tracing is active. + --- ## Custom Properties diff --git a/tests/test_sdk.py b/tests/test_sdk.py index 9d9232d..3cfa45a 100644 --- a/tests/test_sdk.py +++ b/tests/test_sdk.py @@ -135,6 +135,42 @@ def test_set_session_imperative(trace_dir: Path): assert "session_id" not in spans[1]["attrs"] +def test_session_reset_segments_under_same_session_id(trace_dir: Path): + """Mid-conversation reset: spans before and after share session_id + but carry distinct session_segment ids so the dashboard can render + them as separate threads while cost rollups still group them.""" + wt.init(pipeline="t", trace_dir=trace_dir) + with wt.session(id="conv-1", user="alice"): + with wt.span("turn-A"): + pass + new_seg = wt.session_reset() + assert new_seg == 1 + with wt.span("turn-B"): + pass + new_seg = wt.session_reset() + assert new_seg == 2 + with wt.span("turn-C"): + pass + wt.end() + + spans = {s["name"]: s for s in _read_spans(trace_dir)} + # All three turns share the same session_id. + assert spans["turn-A"]["attrs"]["session_id"] == "conv-1" + assert spans["turn-B"]["attrs"]["session_id"] == "conv-1" + assert spans["turn-C"]["attrs"]["session_id"] == "conv-1" + # First turn has no segment attr (segment 0 = original); subsequent + # turns carry incrementing segment ids. + assert spans["turn-A"]["attrs"].get("session_segment") in (None, 0) + assert spans["turn-B"]["attrs"]["session_segment"] == 1 + assert spans["turn-C"]["attrs"]["session_segment"] == 2 + + +def test_session_reset_outside_session_is_noop(): + """Calling session_reset() with no active session returns 0 and + does not raise — so library code can call it defensively.""" + assert wt.session_reset() == 0 + + def test_async_gather_no_parent_id_contamination(trace_dir: Path): """5 concurrent async tasks each open + close their own span. No span should see another task's span as parent.""" diff --git a/wikitrace/__init__.py b/wikitrace/__init__.py index aa6ee28..7dfde80 100644 --- a/wikitrace/__init__.py +++ b/wikitrace/__init__.py @@ -18,7 +18,7 @@ from .sdk import ( init, span, step, cite, end, current_trace_id, span_open, span_event, span_close, - session, set_session, clear_session, + session, set_session, clear_session, session_reset, register_span_start_hook, register_span_end_hook, register_span_event_hook, clear_hooks, ) @@ -32,7 +32,7 @@ __all__ = [ "init", "span", "step", "cite", "end", "current_trace_id", "span_open", "span_event", "span_close", - "session", "set_session", "clear_session", + "session", "set_session", "clear_session", "session_reset", "register_span_start_hook", "register_span_end_hook", "register_span_event_hook", "clear_hooks", "trace", "tool", "eval", diff --git a/wikitrace/sdk.py b/wikitrace/sdk.py index b6bd6f2..0d588c1 100644 --- a/wikitrace/sdk.py +++ b/wikitrace/sdk.py @@ -397,3 +397,39 @@ def set_session( def clear_session() -> None: _session_attrs.set({}) + + +def session_reset() -> int: + """Close the current conversation segment and start a new one + under the same ``session_id``. + + Use this when an agent's conversation history is reset mid-trace + (user clears chat, planner restarts from a checkpoint, evaluator + rolls back state). Spans before and after the reset still share + the same ``session_id`` — so cost rollups, user attribution, and + "all activity for this user this hour" queries continue to group + them — but they carry distinct ``session_segment`` integers so + the dashboard can render them as separate threads. + + Returns the new segment number (starts at 1; increments with each + call). Outside an active session this is a no-op and returns 0. + + Example:: + + with wikitrace.session(id="conv-1", user="alice"): + chain.invoke({"input": q1}) # segment 0 + wikitrace.session_reset() # bumps to segment 1 + chain.invoke({"input": "start over"}) # segment 1 + """ + cur = _ambient_session() + if not cur.get("session_id"): + # No active session_id → nothing to segment. Returning 0 + # rather than raising so downstream `wikitrace.session_reset()` + # calls in shared library code don't crash callers that + # forgot to wrap them in `session()`. + return 0 + next_seg = int(cur.get("session_segment") or 0) + 1 + new = dict(cur) + new["session_segment"] = next_seg + _session_attrs.set(new) + return next_seg