diff --git a/src/agents/run_internal/session_persistence.py b/src/agents/run_internal/session_persistence.py index f483da13a3..cb0e816ca3 100644 --- a/src/agents/run_internal/session_persistence.py +++ b/src/agents/run_internal/session_persistence.py @@ -90,6 +90,12 @@ async def prepare_input_with_session( converted_history = [ strip_internal_input_item_metadata(ensure_input_item_format(item)) for item in history ] + if not is_openai_conversation_session: + converted_history = [ + sanitized_item + for item in converted_history + if (sanitized_item := _strip_reasoning_item_ids_from_history_item(item)) is not None + ] new_input_list = [ ensure_input_item_format(item) for item in ItemHelpers.input_to_new_input_list(input) @@ -187,6 +193,31 @@ async def prepare_input_with_session( return deduplicated, normalize_input_items_for_api(appended_as_inputs) +def _strip_reasoning_item_ids_from_history_item( + item: TResponseInputItem, +) -> TResponseInputItem | None: + """Remove reasoning item IDs from session history before sending to the Responses API. + + Some reasoning models emit `reasoning` items with `rs_...` IDs that are not guaranteed to be + stable across turns. Replaying those IDs in a subsequent `responses.create` call can raise a + 404 "Item with id 'rs_...' not found". Stripping the ID keeps the reasoning payload usable + without relying on server-side item retention. + + Reasoning items without a summary cannot be replayed as model input, so they are dropped. + """ + if not isinstance(item, dict): + return item + if item.get("type") != "reasoning": + return item + if not item.get("summary"): + return None + if "id" not in item: + return item + sanitized = dict(item) + sanitized.pop("id", None) + return cast(TResponseInputItem, sanitized) + + async def persist_session_items_for_guardrail_trip( session: Session | None, server_conversation_tracker: OpenAIServerConversationTracker | None, diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index eb22c70f14..5b74b2f583 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -1824,6 +1824,55 @@ async def test_prepare_input_with_session_prefers_latest_function_call_output(): assert cast(dict[str, Any], session_items[0])["output"] == "new-output" +@pytest.mark.asyncio +async def test_prepare_input_with_session_strips_reasoning_item_ids_from_history(): + reasoning_item = cast( + TResponseInputItem, + { + "type": "reasoning", + "id": "rs_test", + "summary": [{"type": "summary_text", "text": "thinking"}], + }, + ) + session = SimpleListSession(history=[reasoning_item]) + + prepared_input, session_items = await prepare_input_with_session("hello", session, None) + + assert isinstance(prepared_input, list) + assert len(session_items) == 1 + assert cast(dict[str, Any], session_items[0]).get("role") == "user" + prepared_reasoning = [ + cast(dict[str, Any], item) + for item in prepared_input + if isinstance(item, dict) and item.get("type") == "reasoning" + ] + assert len(prepared_reasoning) == 1 + assert prepared_reasoning[0].get("summary") == [{"type": "summary_text", "text": "thinking"}] + assert "id" not in prepared_reasoning[0] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "reasoning_item", + [ + {"type": "reasoning", "id": "rs_missing_summary"}, + {"type": "reasoning", "id": "rs_empty_summary", "summary": []}, + ], +) +async def test_prepare_input_with_session_drops_reasoning_items_without_summary( + reasoning_item: dict[str, Any], +): + session = SimpleListSession(history=[cast(TResponseInputItem, reasoning_item)]) + + prepared_input, session_items = await prepare_input_with_session("hello", session, None) + + assert isinstance(prepared_input, list) + assert len(session_items) == 1 + assert not any( + isinstance(item, dict) and item.get("type") == "reasoning" for item in prepared_input + ) + + @pytest.mark.asyncio async def test_prepare_input_with_session_drops_orphan_function_calls(): orphan_call = cast(