From 34a5cfc496a1678d3fa400cacce45e541b11e67b Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 16:36:16 +0900 Subject: [PATCH 01/10] feat: tool search support in VS Code --- chatmock/model_registry.py | 4 + chatmock/responses_api.py | 10 +- chatmock/routes_openai.py | 44 ++++-- tests/test_models.py | 5 + tests/test_routes.py | 295 +++++++++++++++++++++++++++++++++++++ 5 files changed, 342 insertions(+), 16 deletions(-) diff --git a/chatmock/model_registry.py b/chatmock/model_registry.py index 9bddbeb..49a3590 100644 --- a/chatmock/model_registry.py +++ b/chatmock/model_registry.py @@ -15,6 +15,7 @@ class ModelSpec: aliases: tuple[str, ...] allowed_efforts: frozenset[str] variant_efforts: tuple[str, ...] + public_aliases: tuple[str, ...] = () uses_codex_instructions: bool = False @@ -126,6 +127,8 @@ class ModelSpec: _ALIASES[_spec.public_id] = _spec.upstream_id for _alias in _spec.aliases: _ALIASES[_alias] = _spec.upstream_id + for _public_alias in _spec.public_aliases: + _ALIASES[_public_alias] = _spec.upstream_id def _strip_model_name(model: str | None) -> tuple[str, str | None]: @@ -189,6 +192,7 @@ def list_public_models(expose_reasoning_models: bool = False) -> list[str]: model_ids: list[str] = [] for spec in _MODEL_SPECS: model_ids.append(spec.public_id) + model_ids.extend(spec.public_aliases) if expose_reasoning_models: model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts) return model_ids diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py index 51bda2a..99677af 100644 --- a/chatmock/responses_api.py +++ b/chatmock/responses_api.py @@ -14,6 +14,7 @@ ) from .reasoning import build_reasoning_param from .session import ensure_session_id +from .utils import convert_tools_chat_to_responses @dataclass(frozen=True) @@ -89,12 +90,12 @@ def normalize_responses_payload( normalized = dict(payload) normalized["model"] = normalized_model normalized.pop("max_output_tokens", None) + normalized.pop("truncation", None) if "input" in normalized: normalized["input"] = canonicalize_responses_input(normalized.get("input")) - if "store" not in normalized: - normalized["store"] = False + normalized["store"] = False instructions = normalized.get("instructions") if not isinstance(instructions, str) or not instructions.strip(): @@ -122,6 +123,11 @@ def normalize_responses_payload( normalized["include"] = include_list tools = normalized.get("tools") + converted_tools = convert_tools_chat_to_responses(tools) + if converted_tools: + normalized["tools"] = converted_tools + tools = converted_tools + if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")): tool_choice = normalized.get("tool_choice") if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"): diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index eb37842..fc4488a 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -102,6 +102,31 @@ def _service_tier_from_payload( return resolution.service_tier, None +def _extract_upstream_error_payload(upstream: Any) -> Dict[str, Any]: + raw = getattr(upstream, "content", b"") or b"" + text = (getattr(upstream, "text", "") or "").strip() + parsed: Any = None + + try: + if raw: + parsed = json.loads(raw.decode("utf-8", errors="ignore")) + elif text: + parsed = json.loads(text) + except Exception: + parsed = None + + if isinstance(parsed, dict): + return parsed + if isinstance(parsed, str) and parsed: + return {"error": {"message": parsed}} + if parsed is not None: + try: + return {"error": {"message": json.dumps(parsed, ensure_ascii=False)}} + except Exception: + pass + return {"error": {"message": text or "Upstream error"}} + + @openai_bp.route("/v1/chat/completions", methods=["POST"]) def chat_completions() -> Response: verbose = bool(current_app.config.get("VERBOSE")) @@ -244,11 +269,7 @@ def chat_completions() -> Response: created = int(time.time()) if upstream.status_code >= 400: - try: - raw = upstream.content - err_body = json.loads(raw.decode("utf-8", errors="ignore")) if raw else {"raw": upstream.text} - except Exception: - err_body = {"raw": upstream.text} + err_body = _extract_upstream_error_payload(upstream) if had_responses_tools: if verbose: print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)") @@ -268,22 +289,17 @@ def chat_completions() -> Response: if err2 is None and upstream2 is not None and upstream2.status_code < 400: upstream = upstream2 else: - err = { - "error": { - "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), - "code": "RESPONSES_TOOLS_REJECTED", - } - } + failed_upstream = upstream2 if upstream2 is not None else upstream + err = _extract_upstream_error_payload(failed_upstream) if verbose: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code) else: if verbose: print("Upstream error status=", upstream.status_code) - err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}} if verbose: - _log_json("OUT POST /v1/chat/completions", err) - return jsonify(err), upstream.status_code + _log_json("OUT POST /v1/chat/completions", err_body) + return jsonify(err_body), upstream.status_code if is_stream: if verbose: diff --git a/tests/test_models.py b/tests/test_models.py index e82b516..d9bc42f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -13,6 +13,9 @@ def test_normalizes_aliases(self) -> None: self.assertEqual(normalize_model_name("gpt5.3-codex-spark"), "gpt-5.3-codex-spark") self.assertEqual(normalize_model_name("codex"), "codex-mini-latest") + def test_preserves_unknown_model_names(self) -> None: + self.assertEqual(normalize_model_name("claude-sonnet-4-5"), "claude-sonnet-4-5") + def test_strips_reasoning_suffixes(self) -> None: self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4") self.assertEqual(normalize_model_name("gpt-5.4-mini-high"), "gpt-5.4-mini") @@ -27,9 +30,11 @@ def test_allowed_efforts_follow_registry(self) -> None: def test_public_models_include_variants(self) -> None: model_ids = list_public_models(expose_reasoning_models=True) + self.assertNotIn("claude-sonnet-4-5", model_ids) self.assertIn("gpt-5.4", model_ids) self.assertIn("gpt-5.4-mini", model_ids) self.assertIn("gpt-5.3-codex-spark", model_ids) + self.assertIn("gpt-5-codex", model_ids) self.assertIn("gpt-5.4-none", model_ids) self.assertIn("gpt-5.4-mini-xhigh", model_ids) self.assertNotIn("gpt-5.4-mini-none", model_ids) diff --git a/tests/test_routes.py b/tests/test_routes.py index c5d94bc..88ca715 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -48,6 +48,30 @@ def close(self) -> None: return None +TOOL_SEARCH_PARAMETERS = { + "type": "object", + "properties": {"query": {"type": "string"}}, + "required": ["query"], +} + +TOOL_SEARCH_CHAT_TOOL = { + "type": "function", + "function": { + "name": "tool_search", + "description": "Search the workspace for relevant files and symbols.", + "parameters": TOOL_SEARCH_PARAMETERS, + }, +} + +TOOL_SEARCH_RESPONSES_TOOL = { + "type": "function", + "name": "tool_search", + "description": "Search the workspace for relevant files and symbols.", + "strict": False, + "parameters": TOOL_SEARCH_PARAMETERS, +} + + class RouteTests(unittest.TestCase): def setUp(self) -> None: reset_session_state() @@ -62,6 +86,7 @@ def test_openai_models_list(self) -> None: self.assertIn("gpt-5.4", model_ids) self.assertIn("gpt-5.4-mini", model_ids) self.assertIn("gpt-5.3-codex-spark", model_ids) + self.assertNotIn("claude-sonnet-4-5", model_ids) def test_ollama_tags_list(self) -> None: response = self.client.get("/api/tags") @@ -91,6 +116,143 @@ def test_chat_completions(self, mock_start) -> None: self.assertEqual(body["choices"][0]["message"]["content"], "hello") self.assertEqual(body["model"], "gpt5.4-mini") + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_preserves_upstream_json_error_object(self, mock_start) -> None: + upstream_error = { + "error": { + "message": "Unknown tool: tool_search", + "type": "invalid_request_error", + "param": "tools[0].name", + "code": "unknown_tool", + } + } + mock_start.return_value = ( + FakeUpstream( + status_code=400, + content=json.dumps(upstream_error).encode("utf-8"), + text=json.dumps(upstream_error), + ), + None, + ) + + response = self.client.post( + "/v1/chat/completions", + json={"model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}]}, + ) + + self.assertEqual(response.status_code, 400) + self.assertEqual(response.get_json(), upstream_error) + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_preserve_unknown_model_id(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + requested_model = "claude-sonnet-4-5" + + response = self.client.post( + "/v1/chat/completions", + json={"model": requested_model, "messages": [{"role": "user", "content": "hi"}]}, + ) + + self.assertEqual(response.status_code, 200) + normalized_model = mock_start.call_args.args[0] + self.assertEqual(normalized_model, requested_model) + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_accepts_tool_search_function_tool(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "tools": [TOOL_SEARCH_CHAT_TOOL], + }, + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_start.call_args.kwargs["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_tool_search_round_trips_through_function_call_path(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.output_item.done", + "item": { + "type": "function_call", + "call_id": "call_tool_search", + "name": "tool_search", + "arguments": json.dumps({"query": "workspace symbols"}), + }, + }, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "tools": [TOOL_SEARCH_CHAT_TOOL], + }, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 200) + tool_calls = body["choices"][0]["message"]["tool_calls"] + self.assertEqual(tool_calls[0]["function"]["name"], "tool_search") + self.assertEqual( + json.loads(tool_calls[0]["function"]["arguments"]), + {"query": "workspace symbols"}, + ) + + @patch("chatmock.routes_openai.start_upstream_request") + def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "tools": [TOOL_SEARCH_CHAT_TOOL], + "responses_tools": [{"type": "web_search"}], + }, + ) + + self.assertEqual(response.status_code, 200) + outbound_tools = mock_start.call_args.kwargs["tools"] + self.assertEqual(outbound_tools[0], TOOL_SEARCH_RESPONSES_TOOL) + @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") @@ -254,6 +416,108 @@ def test_responses_route_returns_completed_response_object(self, mock_start) -> self.assertEqual(outbound_payload["reasoning"]["effort"], "medium") self.assertIsInstance(outbound_payload["prompt_cache_key"], str) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_overrides_incoming_store_true(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_store", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_store", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "store": True}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["store"], False) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_accepts_tool_search_function_tool(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_does_not_inject_web_search_when_standard_tools_present(self, mock_start) -> None: + app = create_app(default_web_search=True) + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + ) + + self.assertEqual(response.status_code, 200) + outbound_tools = mock_start.call_args.args[0]["tools"] + self.assertEqual(len(outbound_tools), 1) + self.assertFalse(any(isinstance(tool, dict) and tool.get("type") == "web_search" for tool in outbound_tools)) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") @@ -318,6 +582,37 @@ def test_responses_route_strips_unsupported_max_output_tokens(self, mock_start) outbound_payload = mock_start.call_args.args[0] self.assertNotIn("max_output_tokens", outbound_payload) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_strips_unsupported_truncation(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_truncation", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_truncation", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "truncation": "auto"}, + ) + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertNotIn("truncation", outbound_payload) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None: mock_start.side_effect = [ From ec4c4badf65725128f4642913e95eaabca43c649 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 16:56:53 +0900 Subject: [PATCH 02/10] feat: add client compat startup mode --- chatmock/app.py | 2 ++ chatmock/cli.py | 9 +++++++++ tests/test_routes.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/chatmock/app.py b/chatmock/app.py index e4541dc..03fa7be 100644 --- a/chatmock/app.py +++ b/chatmock/app.py @@ -13,6 +13,7 @@ def create_app( verbose: bool = False, verbose_obfuscation: bool = False, + client_compat: str = "default", reasoning_effort: str = "medium", reasoning_summary: str = "auto", reasoning_compat: str = "think-tags", @@ -26,6 +27,7 @@ def create_app( app.config.update( VERBOSE=bool(verbose), VERBOSE_OBFUSCATION=bool(verbose_obfuscation), + CLIENT_COMPAT=client_compat, REASONING_EFFORT=reasoning_effort, REASONING_SUMMARY=reasoning_summary, REASONING_COMPAT=reasoning_compat, diff --git a/chatmock/cli.py b/chatmock/cli.py index 8482cf3..40361b5 100644 --- a/chatmock/cli.py +++ b/chatmock/cli.py @@ -264,6 +264,7 @@ def cmd_serve( port: int, verbose: bool, verbose_obfuscation: bool, + client_compat: str, reasoning_effort: str, reasoning_summary: str, reasoning_compat: str, @@ -275,6 +276,7 @@ def cmd_serve( app = create_app( verbose=verbose, verbose_obfuscation=verbose_obfuscation, + client_compat=client_compat, reasoning_effort=reasoning_effort, reasoning_summary=reasoning_summary, reasoning_compat=reasoning_compat, @@ -300,6 +302,12 @@ def main() -> None: p_serve.add_argument("--host", default="127.0.0.1") p_serve.add_argument("--port", type=int, default=8000) p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging") + p_serve.add_argument( + "--client-compat", + choices=["default", "vscode"], + default="default", + help="Startup compatibility mode for client-specific behavior (default: default)", + ) p_serve.add_argument( "--verbose-obfuscation", action="store_true", @@ -371,6 +379,7 @@ def main() -> None: port=args.port, verbose=args.verbose, verbose_obfuscation=args.verbose_obfuscation, + client_compat=args.client_compat, reasoning_effort=args.reasoning_effort, reasoning_summary=args.reasoning_summary, reasoning_compat=args.reasoning_compat, diff --git a/tests/test_routes.py b/tests/test_routes.py index 88ca715..e4252ee 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -2,11 +2,13 @@ import json import socket +import sys import threading import time import unittest from unittest.mock import patch +import chatmock.cli as cli from chatmock.app import create_app from chatmock.session import reset_session_state from websockets.sync.client import connect as ws_connect @@ -72,6 +74,43 @@ def close(self) -> None: } +class StartupModeTests(unittest.TestCase): + def test_create_app_defaults_client_compat(self) -> None: + app = create_app() + self.assertEqual(app.config["CLIENT_COMPAT"], "default") + + def test_create_app_accepts_vscode_client_compat(self) -> None: + app = create_app(client_compat="vscode") + self.assertEqual(app.config["CLIENT_COMPAT"], "vscode") + + @patch("chatmock.cli.cmd_serve", return_value=0) + def test_cli_serve_defaults_client_compat(self, mock_cmd_serve) -> None: + with patch.object(sys, "argv", ["chatmock", "serve"]): + with self.assertRaises(SystemExit) as raised: + cli.main() + + self.assertEqual(raised.exception.code, 0) + self.assertEqual(mock_cmd_serve.call_args.kwargs["client_compat"], "default") + + @patch("chatmock.cli.cmd_serve", return_value=0) + def test_cli_serve_accepts_vscode_client_compat(self, mock_cmd_serve) -> None: + with patch.object(sys, "argv", ["chatmock", "serve", "--client-compat", "vscode"]): + with self.assertRaises(SystemExit) as raised: + cli.main() + + self.assertEqual(raised.exception.code, 0) + self.assertEqual(mock_cmd_serve.call_args.kwargs["client_compat"], "vscode") + + @patch("chatmock.cli.cmd_serve", return_value=0) + def test_cli_serve_rejects_invalid_client_compat(self, mock_cmd_serve) -> None: + with patch.object(sys, "argv", ["chatmock", "serve", "--client-compat", "invalid"]): + with self.assertRaises(SystemExit) as raised: + cli.main() + + self.assertEqual(raised.exception.code, 2) + mock_cmd_serve.assert_not_called() + + class RouteTests(unittest.TestCase): def setUp(self) -> None: reset_session_state() From de8ea5facf26dcbc19fc41ef0d9f22fbf3a4eac5 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 17:08:19 +0900 Subject: [PATCH 03/10] feat: gate vscode compat shims --- chatmock/responses_api.py | 19 +++++++ chatmock/routes_openai.py | 22 ++++++++ tests/test_routes.py | 103 +++++++++++++++++++++++++++++++++++--- 3 files changed, 138 insertions(+), 6 deletions(-) diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py index 99677af..3afc1e6 100644 --- a/chatmock/responses_api.py +++ b/chatmock/responses_api.py @@ -36,6 +36,19 @@ class NormalizedResponsesRequest: service_tier_resolution: ServiceTierResolution +def is_vscode_client_compat(config: Dict[str, Any]) -> bool: + return str(config.get("CLIENT_COMPAT") or "default").strip().lower() == "vscode" + + +def _uses_chat_completions_tool_schema(tools: Any) -> bool: + if not isinstance(tools, list): + return False + for tool in tools: + if isinstance(tool, dict) and isinstance(tool.get("function"), dict): + return True + return False + + def instructions_for_model(config: Dict[str, Any], model: str) -> str: base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) if uses_codex_instructions(model): @@ -84,6 +97,12 @@ def normalize_responses_payload( config: Dict[str, Any], client_session_id: str | None = None, ) -> NormalizedResponsesRequest: + if not is_vscode_client_compat(config) and _uses_chat_completions_tool_schema(payload.get("tools")): + raise ResponsesRequestError( + "chat.completions tool schema on /v1/responses is only supported when CLIENT_COMPAT=vscode", + code="CLIENT_COMPAT_UNSUPPORTED", + ) + requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL")) diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index fc4488a..ff70028 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -16,6 +16,7 @@ aggregate_response_from_sse, extract_client_session_id, instructions_for_model, + is_vscode_client_compat, normalize_responses_payload, stream_upstream_bytes, ) @@ -127,6 +128,21 @@ def _extract_upstream_error_payload(upstream: Any) -> Dict[str, Any]: return {"error": {"message": text or "Upstream error"}} +def _client_compat_error_response(feature_name: str, route_name: str, *, verbose: bool = False) -> Response: + err = { + "error": { + "message": f"{feature_name} on {route_name} is only supported when CLIENT_COMPAT=vscode", + "code": "CLIENT_COMPAT_UNSUPPORTED", + } + } + if verbose: + _log_json(f"OUT POST {route_name}", err) + resp = make_response(jsonify(err), 400) + for key, value in build_cors_headers().items(): + resp.headers.setdefault(key, value) + return resp + + @openai_bp.route("/v1/chat/completions", methods=["POST"]) def chat_completions() -> Response: verbose = bool(current_app.config.get("VERBOSE")) @@ -152,6 +168,12 @@ def chat_completions() -> Response: _log_json("OUT POST /v1/chat/completions", err) return jsonify(err), 400 + if not is_vscode_client_compat(current_app.config): + if "responses_tools" in payload: + return _client_compat_error_response("responses_tools", "/v1/chat/completions", verbose=verbose) + if "responses_tool_choice" in payload: + return _client_compat_error_response("responses_tool_choice", "/v1/chat/completions", verbose=verbose) + requested_model = payload.get("model") model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL")) messages = payload.get("messages") diff --git a/tests/test_routes.py b/tests/test_routes.py index e4252ee..50227e7 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -267,7 +267,43 @@ def test_chat_completions_tool_search_round_trips_through_function_call_path(sel ) @patch("chatmock.routes_openai.start_upstream_request") - def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract(self, mock_start) -> None: + def test_chat_completions_rejects_responses_tools_in_default_mode(self, mock_start) -> None: + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "responses_tools": [{"type": "web_search"}], + }, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("responses_tools", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_rejects_responses_tool_choice_in_default_mode(self, mock_start) -> None: + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "responses_tool_choice": "none", + }, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("responses_tool_choice", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_openai.start_upstream_request") + def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract_in_vscode_mode(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() mock_start.return_value = ( FakeUpstream( [ @@ -278,19 +314,21 @@ def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract(self, mo None, ) - response = self.client.post( + response = client.post( "/v1/chat/completions", json={ "model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], "tools": [TOOL_SEARCH_CHAT_TOOL], "responses_tools": [{"type": "web_search"}], + "responses_tool_choice": "none", }, ) self.assertEqual(response.status_code, 200) outbound_tools = mock_start.call_args.kwargs["tools"] self.assertEqual(outbound_tools[0], TOOL_SEARCH_RESPONSES_TOOL) + self.assertEqual(mock_start.call_args.kwargs["tool_choice"], "none") @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_honors_debug_model_override(self, mock_start) -> None: @@ -489,7 +527,22 @@ def test_responses_route_overrides_incoming_store_true(self, mock_start) -> None self.assertEqual(outbound_payload["store"], False) @patch("chatmock.routes_openai.start_upstream_raw_request") - def test_responses_route_accepts_tool_search_function_tool(self, mock_start) -> None: + def test_responses_route_rejects_chat_completions_style_tool_in_default_mode(self, mock_start) -> None: + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("chat.completions tool schema", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_accepts_chat_completions_style_tool_in_vscode_mode(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() mock_start.return_value = ( FakeUpstream( [ @@ -512,15 +565,53 @@ def test_responses_route_accepts_tool_search_function_tool(self, mock_start) -> None, ) - response = self.client.post( + response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, ) self.assertEqual(response.status_code, 200) outbound_payload = mock_start.call_args.args[0] self.assertEqual(outbound_payload["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_accepts_standard_function_tools_in_both_modes(self, mock_start) -> None: + for client_compat in ("default", "vscode"): + with self.subTest(client_compat=client_compat): + app = create_app(client_compat=client_compat) + client = app.test_client() + mock_start.reset_mock() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_does_not_inject_web_search_when_standard_tools_present(self, mock_start) -> None: app = create_app(default_web_search=True) @@ -549,7 +640,7 @@ def test_responses_route_does_not_inject_web_search_when_standard_tools_present( response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, ) self.assertEqual(response.status_code, 200) From c910b1a1ed6af159087e79db3a3fd1ed4a1a6e29 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 17:13:27 +0900 Subject: [PATCH 04/10] fix: update tool search response to use chat tool Updated the response API test to utilize TOOL_SEARCH_CHAT_TOOL instead of TOOL_SEARCH_RESPONSES_TOOL for the post request in RouteTests. --- tests/test_routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_routes.py b/tests/test_routes.py index 50227e7..eec1cd6 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -567,7 +567,7 @@ def test_responses_route_accepts_chat_completions_style_tool_in_vscode_mode(self response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, ) self.assertEqual(response.status_code, 200) From 5ce9115b061e575639808937fb91ab54c5520eac Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 17:49:13 +0900 Subject: [PATCH 05/10] test: align compat mode surfaces --- chatmock/routes_ollama.py | 66 ++++++--- tests/test_routes.py | 274 ++++++++++++++++++++++++++++++++------ 2 files changed, 277 insertions(+), 63 deletions(-) diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index 5da18d0..d7c1645 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -3,8 +3,9 @@ import json import datetime import time -from typing import Any, Dict, List +from typing import Any, Dict, List, cast +from requests import Response as RequestsResponse from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS @@ -12,7 +13,7 @@ from .limits import record_rate_limits_from_response from .http import build_cors_headers from .model_registry import list_public_models, uses_codex_instructions -from .responses_api import instructions_for_model +from .responses_api import instructions_for_model, is_vscode_client_compat from .reasoning import ( allowed_efforts_for_model, build_reasoning_param, @@ -56,6 +57,21 @@ def _gen(): return _gen() +def _client_compat_error_response(feature_name: str, route_name: str, *, verbose: bool = False) -> Response: + err = { + "error": { + "message": f"{feature_name} on {route_name} is only supported when CLIENT_COMPAT=vscode", + "code": "CLIENT_COMPAT_UNSUPPORTED", + } + } + if verbose: + _log_json(f"OUT POST {route_name}", err) + resp = make_response(jsonify(err), 400) + for key, value in build_cors_headers().items(): + resp.headers.setdefault(key, value) + return resp + + @ollama_bp.route("/api/version", methods=["GET"]) def ollama_version() -> Response: if bool(current_app.config.get("VERBOSE")): @@ -182,7 +198,7 @@ def ollama_chat() -> Response: err = {"error": "Invalid JSON body"} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), 400 + return make_response(jsonify(err), 400) model = payload.get("model") raw_messages = payload.get("messages") @@ -199,6 +215,13 @@ def ollama_chat() -> Response: if stream_req is None: stream_req = True stream_req = bool(stream_req) + + if not is_vscode_client_compat(current_app.config): + if "responses_tools" in payload: + return _client_compat_error_response("responses_tools", "/api/chat", verbose=verbose) + if "responses_tool_choice" in payload: + return _client_compat_error_response("responses_tool_choice", "/api/chat", verbose=verbose) + tools_req = payload.get("tools") if isinstance(payload.get("tools"), list) else [] tools_responses = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req)) tool_choice = payload.get("tool_choice", "auto") @@ -216,7 +239,7 @@ def ollama_chat() -> Response: err = {"error": "Only web_search/web_search_preview are supported in responses_tools"} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), 400 + return make_response(jsonify(err), 400) extra_tools.append(_t) if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")): rtc = payload.get("responses_tool_choice") @@ -233,7 +256,7 @@ def ollama_chat() -> Response: err = {"error": "responses_tools too large"} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), 400 + return make_response(jsonify(err), 400) had_responses_tools = True tools_responses = (tools_responses or []) + extra_tools @@ -245,7 +268,7 @@ def ollama_chat() -> Response: err = {"error": "Invalid request format"} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), 400 + return make_response(jsonify(err), 400) input_items = convert_chat_messages_to_responses_input(messages) @@ -263,7 +286,7 @@ def ollama_chat() -> Response: err = {"error": service_tier_resolution.error_message} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), 400 + return make_response(jsonify(err), 400) upstream, error_resp = start_upstream_request( normalized_model, input_items, @@ -293,13 +316,18 @@ def ollama_chat() -> Response: pass return error_resp - record_rate_limits_from_response(upstream) + upstream_resp = cast(RequestsResponse, upstream) + record_rate_limits_from_response(upstream_resp) - if upstream.status_code >= 400: + if upstream_resp.status_code >= 400: try: - err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text} + if upstream_resp.content: + parsed_err_body = json.loads(upstream_resp.content.decode("utf-8", errors="ignore")) + err_body: Dict[str, Any] = parsed_err_body if isinstance(parsed_err_body, dict) else {"raw": parsed_err_body} + else: + err_body = {"raw": upstream_resp.text} except Exception: - err_body = {"raw": upstream.text} + err_body = {"raw": upstream_resp.text} if had_responses_tools: if verbose: print("[Passthrough] Upstream rejected tools; retrying without extras (args redacted)") @@ -322,19 +350,19 @@ def ollama_chat() -> Response: ) record_rate_limits_from_response(upstream2) if err2 is None and upstream2 is not None and upstream2.status_code < 400: - upstream = upstream2 + upstream_resp = upstream2 else: err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code) + return make_response(jsonify(err), upstream2.status_code if upstream2 is not None else upstream_resp.status_code) else: if verbose: - print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000]) + print("/api/chat upstream error status=", upstream_resp.status_code, " body:", json.dumps(err_body)[:2000]) err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")} if verbose: _log_json("OUT POST /api/chat", err) - return jsonify(err), upstream.status_code + return make_response(jsonify(err), upstream_resp.status_code) created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") model_out = model if isinstance(model, str) and model.strip() else normalized_model @@ -348,7 +376,7 @@ def _gen(): pending_summary_paragraph = False full_parts: List[str] = [] try: - for raw_line in upstream.iter_lines(decode_unicode=False): + for raw_line in upstream_resp.iter_lines(decode_unicode=False): if not raw_line: continue line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line @@ -478,7 +506,7 @@ def _gen(): elif kind == "response.completed": break finally: - upstream.close() + upstream_resp.close() if compat == "think-tags" and think_open and not think_closed: yield ( json.dumps( @@ -518,7 +546,7 @@ def _gen(): reasoning_full_text = "" tool_calls: List[Dict[str, Any]] = [] try: - for raw in upstream.iter_lines(decode_unicode=False): + for raw in upstream_resp.iter_lines(decode_unicode=False): if not raw: continue line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw @@ -557,7 +585,7 @@ def _gen(): elif kind == "response.completed": break finally: - upstream.close() + upstream_resp.close() if (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower() == "think-tags": rtxt_parts = [] diff --git a/tests/test_routes.py b/tests/test_routes.py index eec1cd6..deaa08b 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -50,6 +50,42 @@ def close(self) -> None: return None +class FakeUpstreamWebsocket: + def __init__(self, messages: list[dict[str, object]] | None = None) -> None: + self.sent: list[str] = [] + self._messages = [json.dumps(message) for message in (messages or [])] + + def send(self, message: str) -> None: + self.sent.append(message) + + def recv(self) -> str: + return self._messages.pop(0) + + def close(self) -> None: + return None + + +def start_test_server(app) -> tuple[str, int]: + sock = socket.socket() + sock.bind(("127.0.0.1", 0)) + host, port = sock.getsockname() + sock.close() + + server_thread = threading.Thread( + target=app.run, + kwargs={ + "host": host, + "port": port, + "use_reloader": False, + "threaded": True, + }, + daemon=True, + ) + server_thread.start() + time.sleep(0.5) + return host, port + + TOOL_SEARCH_PARAMETERS = { "type": "object", "properties": {"query": {"type": "string"}}, @@ -392,6 +428,128 @@ def test_ollama_chat_honors_debug_model_override(self, mock_start) -> None: self.assertEqual(mock_start.call_args.args[0], "gpt-5.4") self.assertEqual(body["model"], "gpt-5.4") + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_rejects_responses_tools_in_default_mode(self, mock_start) -> None: + response = self.client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "responses_tools": [{"type": "web_search"}], + }, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("responses_tools", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_rejects_responses_tool_choice_in_default_mode(self, mock_start) -> None: + response = self.client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "responses_tool_choice": "none", + }, + ) + body = response.get_json() + + self.assertEqual(response.status_code, 400) + self.assertEqual(body["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("responses_tool_choice", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_accepts_responses_extensions_in_vscode_mode(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed"}, + ] + ), + None, + ) + + response = client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "responses_tools": [{"type": "web_search"}], + "responses_tool_choice": "none", + }, + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_start.call_args.kwargs["tools"], [{"type": "web_search"}]) + self.assertEqual(mock_start.call_args.kwargs["tool_choice"], "none") + + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_accepts_responses_tool_choice_in_vscode_mode(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed"}, + ] + ), + None, + ) + + response = client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "responses_tool_choice": "none", + }, + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_start.call_args.kwargs["tool_choice"], "none") + + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_accepts_standard_function_tools_in_both_modes(self, mock_start) -> None: + for client_compat in ("default", "vscode"): + with self.subTest(client_compat=client_compat): + app = create_app(client_compat=client_compat) + client = app.test_client() + mock_start.reset_mock() + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed"}, + ] + ), + None, + ) + + response = client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "tools": [TOOL_SEARCH_CHAT_TOOL], + }, + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_start.call_args.kwargs["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None: mock_start.return_value = ( @@ -983,57 +1141,85 @@ def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) @patch("chatmock.websocket_routes.connect_upstream_websocket") - def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None: - class FakeUpstreamWebsocket: - def __init__(self) -> None: - self.sent: list[str] = [] - self._messages = [ - json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}), - json.dumps({ - "type": "response.output_item.done", - "item": { - "type": "message", - "role": "assistant", - "id": "msg_1", - "content": [{"type": "output_text", "text": "assistant output"}], - }, - }), - json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}), - json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}), - json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}), - ] - - def send(self, message: str) -> None: - self.sent.append(message) + def test_responses_websocket_rejects_chat_completions_style_tool_in_default_mode(self, mock_connect, _mock_auth) -> None: + app = create_app() + host, port = start_test_server(app) - def recv(self) -> str: - return self._messages.pop(0) + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "input": "hello", + "tools": [TOOL_SEARCH_CHAT_TOOL], + } + ) + ) + error = json.loads(client.recv()) - def close(self) -> None: - return None + self.assertEqual(error["type"], "error") + self.assertEqual(error["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") + self.assertIn("chat.completions tool schema", error["error"]["message"]) + mock_connect.assert_not_called() - fake_upstream = FakeUpstreamWebsocket() + @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) + @patch("chatmock.websocket_routes.connect_upstream_websocket") + def test_responses_websocket_accepts_chat_completions_style_tool_in_vscode_mode(self, mock_connect, _mock_auth) -> None: + fake_upstream = FakeUpstreamWebsocket( + [ + {"type": "response.created", "response": {"id": "resp_ws_1"}}, + {"type": "response.completed", "response": {"id": "resp_ws_1"}}, + ] + ) mock_connect.return_value = fake_upstream - app = create_app() + app = create_app(client_compat="vscode") + host, port = start_test_server(app) - sock = socket.socket() - sock.bind(("127.0.0.1", 0)) - host, port = sock.getsockname() - sock.close() - - server_thread = threading.Thread( - target=app.run, - kwargs={ - "host": host, - "port": port, - "use_reloader": False, - "threaded": True, - }, - daemon=True, + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "input": "hello", + "tools": [TOOL_SEARCH_CHAT_TOOL], + } + ) + ) + first = json.loads(client.recv()) + second = json.loads(client.recv()) + + self.assertEqual(first["type"], "response.created") + self.assertEqual(second["type"], "response.completed") + outbound = json.loads(fake_upstream.sent[0]) + self.assertEqual(outbound["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + + @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) + @patch("chatmock.websocket_routes.connect_upstream_websocket") + def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None: + fake_upstream = FakeUpstreamWebsocket( + [ + {"type": "response.created", "response": {"id": "resp_ws_1"}}, + { + "type": "response.output_item.done", + "item": { + "type": "message", + "role": "assistant", + "id": "msg_1", + "content": [{"type": "output_text", "text": "assistant output"}], + }, + }, + {"type": "response.completed", "response": {"id": "resp_ws_1"}}, + {"type": "response.created", "response": {"id": "resp_ws_2"}}, + {"type": "response.completed", "response": {"id": "resp_ws_2"}}, + ] ) - server_thread.start() - time.sleep(0.5) + mock_connect.return_value = fake_upstream + + app = create_app() + host, port = start_test_server(app) with ws_connect(f"ws://{host}:{port}/v1/responses") as client: client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True})) From 77513a52a6569b2482fb35c0af123d58e344abad Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 18:20:13 +0900 Subject: [PATCH 06/10] Apply PR review fixes --- chatmock/responses_api.py | 26 ++++- chatmock/routes_ollama.py | 46 ++++---- chatmock/routes_openai.py | 45 ++++---- tests/test_routes.py | 222 ++++++++++++++++++++++++++------------ 4 files changed, 234 insertions(+), 105 deletions(-) diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py index 3afc1e6..7401f88 100644 --- a/chatmock/responses_api.py +++ b/chatmock/responses_api.py @@ -2,10 +2,13 @@ import json from dataclasses import dataclass -from typing import Any, Dict, Iterable, Iterator, List +from typing import Any, Callable, Dict, Iterable, Iterator, List + +from flask import Response, jsonify, make_response from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS from .fast_mode import ServiceTierResolution, resolve_service_tier +from .http import build_cors_headers from .model_registry import ( allowed_efforts_for_model, extract_reasoning_from_model_name, @@ -40,6 +43,27 @@ def is_vscode_client_compat(config: Dict[str, Any]) -> bool: return str(config.get("CLIENT_COMPAT") or "default").strip().lower() == "vscode" +def client_compat_error_response( + feature_name: str, + route_name: str, + *, + verbose: bool = False, + log_json: Callable[[str, Any], None] | None = None, +) -> Response: + err = { + "error": { + "message": f"{feature_name} on {route_name} is only supported when CLIENT_COMPAT=vscode", + "code": "CLIENT_COMPAT_UNSUPPORTED", + } + } + if verbose and log_json is not None: + log_json(f"OUT POST {route_name}", err) + resp = make_response(jsonify(err), 400) + for key, value in build_cors_headers().items(): + resp.headers.setdefault(key, value) + return resp + + def _uses_chat_completions_tool_schema(tools: Any) -> bool: if not isinstance(tools, list): return False diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index d7c1645..7a06d47 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -13,7 +13,7 @@ from .limits import record_rate_limits_from_response from .http import build_cors_headers from .model_registry import list_public_models, uses_codex_instructions -from .responses_api import instructions_for_model, is_vscode_client_compat +from .responses_api import client_compat_error_response, instructions_for_model, is_vscode_client_compat from .reasoning import ( allowed_efforts_for_model, build_reasoning_param, @@ -57,21 +57,6 @@ def _gen(): return _gen() -def _client_compat_error_response(feature_name: str, route_name: str, *, verbose: bool = False) -> Response: - err = { - "error": { - "message": f"{feature_name} on {route_name} is only supported when CLIENT_COMPAT=vscode", - "code": "CLIENT_COMPAT_UNSUPPORTED", - } - } - if verbose: - _log_json(f"OUT POST {route_name}", err) - resp = make_response(jsonify(err), 400) - for key, value in build_cors_headers().items(): - resp.headers.setdefault(key, value) - return resp - - @ollama_bp.route("/api/version", methods=["GET"]) def ollama_version() -> Response: if bool(current_app.config.get("VERBOSE")): @@ -218,9 +203,19 @@ def ollama_chat() -> Response: if not is_vscode_client_compat(current_app.config): if "responses_tools" in payload: - return _client_compat_error_response("responses_tools", "/api/chat", verbose=verbose) + return client_compat_error_response( + "responses_tools", + "/api/chat", + verbose=verbose, + log_json=_log_json, + ) if "responses_tool_choice" in payload: - return _client_compat_error_response("responses_tool_choice", "/api/chat", verbose=verbose) + return client_compat_error_response( + "responses_tool_choice", + "/api/chat", + verbose=verbose, + log_json=_log_json, + ) tools_req = payload.get("tools") if isinstance(payload.get("tools"), list) else [] tools_responses = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req)) @@ -349,7 +344,20 @@ def ollama_chat() -> Response: service_tier=service_tier_resolution.service_tier, ) record_rate_limits_from_response(upstream2) - if err2 is None and upstream2 is not None and upstream2.status_code < 400: + if err2 is not None: + if verbose: + try: + body = err2.get_data(as_text=True) + if body: + try: + parsed = json.loads(body) + except Exception: + parsed = body + _log_json("OUT POST /api/chat", parsed) + except Exception: + pass + return err2 + if upstream2 is not None and upstream2.status_code < 400: upstream_resp = upstream2 else: err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}} diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index ff70028..3bb7bdf 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -14,6 +14,7 @@ from .responses_api import ( ResponsesRequestError, aggregate_response_from_sse, + client_compat_error_response, extract_client_session_id, instructions_for_model, is_vscode_client_compat, @@ -128,21 +129,6 @@ def _extract_upstream_error_payload(upstream: Any) -> Dict[str, Any]: return {"error": {"message": text or "Upstream error"}} -def _client_compat_error_response(feature_name: str, route_name: str, *, verbose: bool = False) -> Response: - err = { - "error": { - "message": f"{feature_name} on {route_name} is only supported when CLIENT_COMPAT=vscode", - "code": "CLIENT_COMPAT_UNSUPPORTED", - } - } - if verbose: - _log_json(f"OUT POST {route_name}", err) - resp = make_response(jsonify(err), 400) - for key, value in build_cors_headers().items(): - resp.headers.setdefault(key, value) - return resp - - @openai_bp.route("/v1/chat/completions", methods=["POST"]) def chat_completions() -> Response: verbose = bool(current_app.config.get("VERBOSE")) @@ -170,9 +156,19 @@ def chat_completions() -> Response: if not is_vscode_client_compat(current_app.config): if "responses_tools" in payload: - return _client_compat_error_response("responses_tools", "/v1/chat/completions", verbose=verbose) + return client_compat_error_response( + "responses_tools", + "/v1/chat/completions", + verbose=verbose, + log_json=_log_json, + ) if "responses_tool_choice" in payload: - return _client_compat_error_response("responses_tool_choice", "/v1/chat/completions", verbose=verbose) + return client_compat_error_response( + "responses_tool_choice", + "/v1/chat/completions", + verbose=verbose, + log_json=_log_json, + ) requested_model = payload.get("model") model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL")) @@ -308,7 +304,20 @@ def chat_completions() -> Response: service_tier=service_tier, ) record_rate_limits_from_response(upstream2) - if err2 is None and upstream2 is not None and upstream2.status_code < 400: + if err2 is not None: + if verbose: + try: + body = err2.get_data(as_text=True) + if body: + try: + parsed = json.loads(body) + except Exception: + parsed = body + _log_json("OUT POST /v1/chat/completions", parsed) + except Exception: + pass + return err2 + if upstream2 is not None and upstream2.status_code < 400: upstream = upstream2 else: failed_upstream = upstream2 if upstream2 is not None else upstream diff --git a/tests/test_routes.py b/tests/test_routes.py index deaa08b..34f6f6c 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -6,12 +6,14 @@ import threading import time import unittest +from contextlib import closing, contextmanager from unittest.mock import patch import chatmock.cli as cli from chatmock.app import create_app from chatmock.session import reset_session_state from websockets.sync.client import connect as ws_connect +from werkzeug.serving import make_server class FakeUpstream: @@ -65,25 +67,30 @@ def close(self) -> None: return None -def start_test_server(app) -> tuple[str, int]: - sock = socket.socket() - sock.bind(("127.0.0.1", 0)) - host, port = sock.getsockname() - sock.close() - - server_thread = threading.Thread( - target=app.run, - kwargs={ - "host": host, - "port": port, - "use_reloader": False, - "threaded": True, - }, - daemon=True, - ) +def _wait_for_server(host: str, port: int, *, timeout: float = 2.0) -> None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + with closing(socket.create_connection((host, port), timeout=0.1)): + return + except OSError: + time.sleep(0.01) + raise RuntimeError(f"Timed out waiting for test server on {host}:{port}") + + +@contextmanager +def start_test_server(app): + server = make_server("127.0.0.1", 0, app, threaded=True) + host, port = server.socket.getsockname()[:2] + server_thread = threading.Thread(target=server.serve_forever, daemon=True) server_thread.start() - time.sleep(0.5) - return host, port + _wait_for_server(host, port) + try: + yield host, port + finally: + server.shutdown() + server.server_close() + server_thread.join(timeout=2.0) TOOL_SEARCH_PARAMETERS = { @@ -366,6 +373,42 @@ def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract_in_vscod self.assertEqual(outbound_tools[0], TOOL_SEARCH_RESPONSES_TOOL) self.assertEqual(mock_start.call_args.kwargs["tool_choice"], "none") + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_retry_returns_second_start_error_response(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() + initial_error = {"error": {"message": "tool rejected", "code": "initial_failure"}} + retry_error = {"error": {"message": "retry failed", "code": "retry_failure"}} + retry_response = app.response_class( + response=json.dumps(retry_error), + status=429, + mimetype="application/json", + ) + mock_start.side_effect = [ + ( + FakeUpstream( + status_code=400, + content=json.dumps(initial_error).encode("utf-8"), + text=json.dumps(initial_error), + ), + None, + ), + (None, retry_response), + ] + + response = client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "responses_tools": [{"type": "web_search"}], + }, + ) + + self.assertEqual(response.status_code, 429) + self.assertEqual(response.get_json(), retry_error) + self.assertEqual(mock_start.call_count, 2) + @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") @@ -550,6 +593,43 @@ def test_ollama_chat_accepts_standard_function_tools_in_both_modes(self, mock_st self.assertEqual(response.status_code, 200) self.assertEqual(mock_start.call_args.kwargs["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + @patch("chatmock.routes_ollama.start_upstream_request") + def test_ollama_chat_retry_returns_second_start_error_response(self, mock_start) -> None: + app = create_app(client_compat="vscode") + client = app.test_client() + initial_error = {"error": {"message": "tool rejected", "code": "initial_failure"}} + retry_error = {"error": {"message": "retry failed", "code": "retry_failure"}} + retry_response = app.response_class( + response=json.dumps(retry_error), + status=429, + mimetype="application/json", + ) + mock_start.side_effect = [ + ( + FakeUpstream( + status_code=400, + content=json.dumps(initial_error).encode("utf-8"), + text=json.dumps(initial_error), + ), + None, + ), + (None, retry_response), + ] + + response = client.post( + "/api/chat", + json={ + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "hi"}], + "stream": False, + "responses_tools": [{"type": "web_search"}], + }, + ) + + self.assertEqual(response.status_code, 429) + self.assertEqual(response.get_json(), retry_error) + self.assertEqual(mock_start.call_count, 2) + @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None: mock_start.return_value = ( @@ -1139,24 +1219,34 @@ def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) self.assertIn("Fast mode is not supported", body["error"]["message"]) mock_start.assert_not_called() + def test_start_test_server_context_manager_starts_and_stops_server(self) -> None: + app = create_app() + + with start_test_server(app) as (host, port): + with closing(socket.create_connection((host, port), timeout=1.0)): + pass + + with self.assertRaises(OSError): + with closing(socket.create_connection((host, port), timeout=0.2)): + pass + @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) @patch("chatmock.websocket_routes.connect_upstream_websocket") def test_responses_websocket_rejects_chat_completions_style_tool_in_default_mode(self, mock_connect, _mock_auth) -> None: app = create_app() - host, port = start_test_server(app) - - with ws_connect(f"ws://{host}:{port}/v1/responses") as client: - client.send( - json.dumps( - { - "type": "response.create", - "model": "gpt-5.4", - "input": "hello", - "tools": [TOOL_SEARCH_CHAT_TOOL], - } + with start_test_server(app) as (host, port): + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "input": "hello", + "tools": [TOOL_SEARCH_CHAT_TOOL], + } + ) ) - ) - error = json.loads(client.recv()) + error = json.loads(client.recv()) self.assertEqual(error["type"], "error") self.assertEqual(error["error"]["code"], "CLIENT_COMPAT_UNSUPPORTED") @@ -1175,21 +1265,20 @@ def test_responses_websocket_accepts_chat_completions_style_tool_in_vscode_mode( mock_connect.return_value = fake_upstream app = create_app(client_compat="vscode") - host, port = start_test_server(app) - - with ws_connect(f"ws://{host}:{port}/v1/responses") as client: - client.send( - json.dumps( - { - "type": "response.create", - "model": "gpt-5.4", - "input": "hello", - "tools": [TOOL_SEARCH_CHAT_TOOL], - } + with start_test_server(app) as (host, port): + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "input": "hello", + "tools": [TOOL_SEARCH_CHAT_TOOL], + } + ) ) - ) - first = json.loads(client.recv()) - second = json.loads(client.recv()) + first = json.loads(client.recv()) + second = json.loads(client.recv()) self.assertEqual(first["type"], "response.created") self.assertEqual(second["type"], "response.completed") @@ -1219,29 +1308,28 @@ def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_ mock_connect.return_value = fake_upstream app = create_app() - host, port = start_test_server(app) - - with ws_connect(f"ws://{host}:{port}/v1/responses") as client: - client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True})) - first = json.loads(client.recv()) - assistant = json.loads(client.recv()) - second = json.loads(client.recv()) - client.send( - json.dumps( - { - "type": "response.create", - "model": "gpt-5.4", - "fast_mode": True, - "input": [ - {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, - {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, - {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, - ], - } + with start_test_server(app) as (host, port): + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True})) + first = json.loads(client.recv()) + assistant = json.loads(client.recv()) + second = json.loads(client.recv()) + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "fast_mode": True, + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + } + ) ) - ) - third = json.loads(client.recv()) - fourth = json.loads(client.recv()) + third = json.loads(client.recv()) + fourth = json.loads(client.recv()) self.assertEqual(first["type"], "response.created") self.assertEqual(assistant["type"], "response.output_item.done") From 3de71a15d5eab625335259bc2df89029a0013496 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 2 May 2026 18:37:26 +0900 Subject: [PATCH 07/10] Clean up model and route tests --- tests/test_models.py | 2 +- tests/test_routes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index d9bc42f..023a8b9 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -14,7 +14,7 @@ def test_normalizes_aliases(self) -> None: self.assertEqual(normalize_model_name("codex"), "codex-mini-latest") def test_preserves_unknown_model_names(self) -> None: - self.assertEqual(normalize_model_name("claude-sonnet-4-5"), "claude-sonnet-4-5") + self.assertEqual(normalize_model_name("unknown-model-xyz"), "unknown-model-xyz") def test_strips_reasoning_suffixes(self) -> None: self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4") diff --git a/tests/test_routes.py b/tests/test_routes.py index 34f6f6c..71cb037 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -236,7 +236,7 @@ def test_chat_completions_preserve_unknown_model_id(self, mock_start) -> None: ), None, ) - requested_model = "claude-sonnet-4-5" + requested_model = "unknown-model-xyz" response = self.client.post( "/v1/chat/completions", From 00a96bd9d5abe5cfdae7cd0bb134f461ae32a9a9 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sun, 3 May 2026 04:19:50 +0900 Subject: [PATCH 08/10] fix: update normalization of responses payload - Prevent forwarding of truncation hints to the upstream as the Codex backend rejects them. - Set the store field to False to comply with the upstream contract that rejects stored responses. Co-authored-by: Copilot --- chatmock/responses_api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py index 7401f88..e8592ba 100644 --- a/chatmock/responses_api.py +++ b/chatmock/responses_api.py @@ -133,11 +133,15 @@ def normalize_responses_payload( normalized = dict(payload) normalized["model"] = normalized_model normalized.pop("max_output_tokens", None) + # The Codex backend behind ChatMock rejects Responses truncation hints, + # so keep accepting the client field but do not forward it upstream. normalized.pop("truncation", None) if "input" in normalized: normalized["input"] = canonicalize_responses_input(normalized.get("input")) + # Copilot/Codex traffic is expected to be non-persistent here and the + # upstream contract rejects stored responses, so always pin this off. normalized["store"] = False instructions = normalized.get("instructions") From 05c9996a3fed1d47b68808cfc2a7069a397e1820 Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 9 May 2026 21:44:47 +0900 Subject: [PATCH 09/10] test: rebaseline tool-search route tests --- tests/test_routes.py | 334 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 309 insertions(+), 25 deletions(-) diff --git a/tests/test_routes.py b/tests/test_routes.py index 71cb037..895b876 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -99,7 +99,18 @@ def start_test_server(app): "required": ["query"], } -TOOL_SEARCH_CHAT_TOOL = { +NATIVE_TOOL_SEARCH_PARAMETERS = { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Natural language description of what tool capability you are looking for.", + } + }, + "required": ["query"], +} + +LEGACY_TOOL_SEARCH_CHAT_TOOL = { "type": "function", "function": { "name": "tool_search", @@ -108,7 +119,7 @@ def start_test_server(app): }, } -TOOL_SEARCH_RESPONSES_TOOL = { +LEGACY_TOOL_SEARCH_RESPONSES_TOOL = { "type": "function", "name": "tool_search", "description": "Search the workspace for relevant files and symbols.", @@ -116,6 +127,27 @@ def start_test_server(app): "parameters": TOOL_SEARCH_PARAMETERS, } +NATIVE_TOOL_SEARCH_RESPONSES_TOOL = { + "type": "tool_search", + "execution": "client", + "description": "Search for relevant tools by describing what you need. Returns tool definitions for tools matching your query.", + "parameters": NATIVE_TOOL_SEARCH_PARAMETERS, +} + +# Matches the VS Code tool_search_output ToolSearchLoadedTool wire shape. +VSCODE_TOOL_SEARCH_OUTPUT_TOOL = { + "type": "function", + "name": "grep_search", + "description": "Search for text", + "defer_loading": True, + "parameters": TOOL_SEARCH_PARAMETERS, +} + +DEFERRED_TOOL_SEARCH_OUTPUT_TOOL_WITH_NAMESPACE = { + **VSCODE_TOOL_SEARCH_OUTPUT_TOOL, + "namespace": "grep_search", +} + class StartupModeTests(unittest.TestCase): def test_create_app_defaults_client_compat(self) -> None: @@ -248,7 +280,7 @@ def test_chat_completions_preserve_unknown_model_id(self, mock_start) -> None: self.assertEqual(normalized_model, requested_model) @patch("chatmock.routes_openai.start_upstream_request") - def test_chat_completions_accepts_tool_search_function_tool(self, mock_start) -> None: + def test_chat_completions_accepts_legacy_tool_search_function_tool(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ @@ -264,15 +296,15 @@ def test_chat_completions_accepts_tool_search_function_tool(self, mock_start) -> json={ "model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], }, ) self.assertEqual(response.status_code, 200) - self.assertEqual(mock_start.call_args.kwargs["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + self.assertEqual(mock_start.call_args.kwargs["tools"], [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) @patch("chatmock.routes_openai.start_upstream_request") - def test_chat_completions_tool_search_round_trips_through_function_call_path(self, mock_start) -> None: + def test_chat_completions_legacy_tool_search_round_trips_through_function_call_path(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ @@ -296,7 +328,7 @@ def test_chat_completions_tool_search_round_trips_through_function_call_path(sel json={ "model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], }, ) body = response.get_json() @@ -362,7 +394,7 @@ def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract_in_vscod json={ "model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], "responses_tools": [{"type": "web_search"}], "responses_tool_choice": "none", }, @@ -370,7 +402,7 @@ def test_mixed_tools_and_responses_tools_prefer_standard_tools_contract_in_vscod self.assertEqual(response.status_code, 200) outbound_tools = mock_start.call_args.kwargs["tools"] - self.assertEqual(outbound_tools[0], TOOL_SEARCH_RESPONSES_TOOL) + self.assertEqual(outbound_tools[0], LEGACY_TOOL_SEARCH_RESPONSES_TOOL) self.assertEqual(mock_start.call_args.kwargs["tool_choice"], "none") @patch("chatmock.routes_openai.start_upstream_request") @@ -586,12 +618,12 @@ def test_ollama_chat_accepts_standard_function_tools_in_both_modes(self, mock_st "model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], "stream": False, - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], }, ) self.assertEqual(response.status_code, 200) - self.assertEqual(mock_start.call_args.kwargs["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + self.assertEqual(mock_start.call_args.kwargs["tools"], [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) @patch("chatmock.routes_ollama.start_upstream_request") def test_ollama_chat_retry_returns_second_start_error_response(self, mock_start) -> None: @@ -768,7 +800,7 @@ def test_responses_route_overrides_incoming_store_true(self, mock_start) -> None def test_responses_route_rejects_chat_completions_style_tool_in_default_mode(self, mock_start) -> None: response = self.client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL]}, ) body = response.get_json() @@ -778,7 +810,7 @@ def test_responses_route_rejects_chat_completions_style_tool_in_default_mode(sel mock_start.assert_not_called() @patch("chatmock.routes_openai.start_upstream_raw_request") - def test_responses_route_accepts_chat_completions_style_tool_in_vscode_mode(self, mock_start) -> None: + def test_responses_route_accepts_legacy_chat_completions_style_tool_in_vscode_mode(self, mock_start) -> None: app = create_app(client_compat="vscode") client = app.test_client() mock_start.return_value = ( @@ -805,15 +837,15 @@ def test_responses_route_accepts_chat_completions_style_tool_in_vscode_mode(self response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_CHAT_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL]}, ) self.assertEqual(response.status_code, 200) outbound_payload = mock_start.call_args.args[0] - self.assertEqual(outbound_payload["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + self.assertEqual(outbound_payload["tools"], [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) @patch("chatmock.routes_openai.start_upstream_raw_request") - def test_responses_route_accepts_standard_function_tools_in_both_modes(self, mock_start) -> None: + def test_responses_route_accepts_legacy_function_tool_search_in_both_modes(self, mock_start) -> None: for client_compat in ("default", "vscode"): with self.subTest(client_compat=client_compat): app = create_app(client_compat=client_compat) @@ -843,15 +875,218 @@ def test_responses_route_accepts_standard_function_tools_in_both_modes(self, moc response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]}, ) self.assertEqual(response.status_code, 200) outbound_payload = mock_start.call_args.args[0] - self.assertEqual(outbound_payload["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + self.assertEqual(outbound_payload["tools"], [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_accepts_native_tool_search_in_both_modes(self, mock_start) -> None: + for client_compat in ("default", "vscode"): + with self.subTest(client_compat=client_compat): + app = create_app(client_compat=client_compat) + client = app.test_client() + mock_start.reset_mock() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_native_tool", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_native_tool", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [NATIVE_TOOL_SEARCH_RESPONSES_TOOL]}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["tools"], [NATIVE_TOOL_SEARCH_RESPONSES_TOOL]) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_native_tool_search_call_input(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_tool_search_call", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_tool_search_call", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + native_input = [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + { + "type": "tool_search_call", + "execution": "client", + "call_id": "call_ts_1", + "status": "completed", + "arguments": {"query": "workspace symbols"}, + }, + ] + + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": native_input}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["input"], native_input) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_native_tool_search_output_input(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_tool_search_output", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_tool_search_output", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + native_input = [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + { + "type": "tool_search_output", + "execution": "client", + "call_id": "call_ts_1", + "status": "completed", + "tools": [VSCODE_TOOL_SEARCH_OUTPUT_TOOL], + }, + ] + + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": native_input}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["input"], native_input) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_extra_tool_fields_on_native_tool_search_output_input(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_tool_search_output_extra", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_tool_search_output_extra", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + native_input = [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + { + "type": "tool_search_output", + "execution": "client", + "call_id": "call_ts_1", + "status": "completed", + "tools": [DEFERRED_TOOL_SEARCH_OUTPUT_TOOL_WITH_NAMESPACE], + }, + ] + + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": native_input}, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["input"], native_input) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_does_not_inject_web_search_when_legacy_standard_tools_present(self, mock_start) -> None: + app = create_app(default_web_search=True) + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + response = client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "tools": [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]}, + ) + + self.assertEqual(response.status_code, 200) + outbound_tools = mock_start.call_args.args[0]["tools"] + self.assertEqual(outbound_tools, [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) + self.assertFalse(any(isinstance(tool, dict) and tool.get("type") == "web_search" for tool in outbound_tools)) @patch("chatmock.routes_openai.start_upstream_raw_request") - def test_responses_route_does_not_inject_web_search_when_standard_tools_present(self, mock_start) -> None: + def test_responses_route_does_not_inject_web_search_when_native_standard_tools_present(self, mock_start) -> None: app = create_app(default_web_search=True) client = app.test_client() mock_start.return_value = ( @@ -878,12 +1113,12 @@ def test_responses_route_does_not_inject_web_search_when_standard_tools_present( response = client.post( "/v1/responses", - json={"model": "gpt-5.4", "input": "hello", "tools": [TOOL_SEARCH_RESPONSES_TOOL]}, + json={"model": "gpt-5.4", "input": "hello", "tools": [NATIVE_TOOL_SEARCH_RESPONSES_TOOL]}, ) self.assertEqual(response.status_code, 200) outbound_tools = mock_start.call_args.args[0]["tools"] - self.assertEqual(len(outbound_tools), 1) + self.assertEqual(outbound_tools, [NATIVE_TOOL_SEARCH_RESPONSES_TOOL]) self.assertFalse(any(isinstance(tool, dict) and tool.get("type") == "web_search" for tool in outbound_tools)) @patch("chatmock.routes_openai.start_upstream_raw_request") @@ -982,7 +1217,56 @@ def test_responses_route_strips_unsupported_truncation(self, mock_start) -> None self.assertNotIn("truncation", outbound_payload) @patch("chatmock.routes_openai.start_upstream_raw_request") - def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None: + def test_responses_route_preserves_explicit_previous_response_id_on_http(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_explicit_prev", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_explicit_prev", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + + native_input = [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "resume"}]}, + { + "type": "tool_search_output", + "execution": "client", + "call_id": "call_ts_resume", + "status": "completed", + "tools": [VSCODE_TOOL_SEARCH_OUTPUT_TOOL], + }, + ] + + response = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "previous_response_id": "resp_prev_explicit", + "input": native_input, + }, + ) + + self.assertEqual(response.status_code, 200) + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["previous_response_id"], "resp_prev_explicit") + self.assertEqual(outbound_payload["input"], native_input) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_does_not_auto_inject_previous_response_id_for_http_follow_up(self, mock_start) -> None: mock_start.side_effect = [ ( FakeUpstream( @@ -1242,7 +1526,7 @@ def test_responses_websocket_rejects_chat_completions_style_tool_in_default_mode "type": "response.create", "model": "gpt-5.4", "input": "hello", - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], } ) ) @@ -1273,7 +1557,7 @@ def test_responses_websocket_accepts_chat_completions_style_tool_in_vscode_mode( "type": "response.create", "model": "gpt-5.4", "input": "hello", - "tools": [TOOL_SEARCH_CHAT_TOOL], + "tools": [LEGACY_TOOL_SEARCH_CHAT_TOOL], } ) ) @@ -1283,7 +1567,7 @@ def test_responses_websocket_accepts_chat_completions_style_tool_in_vscode_mode( self.assertEqual(first["type"], "response.created") self.assertEqual(second["type"], "response.completed") outbound = json.loads(fake_upstream.sent[0]) - self.assertEqual(outbound["tools"], [TOOL_SEARCH_RESPONSES_TOOL]) + self.assertEqual(outbound["tools"], [LEGACY_TOOL_SEARCH_RESPONSES_TOOL]) @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) @patch("chatmock.websocket_routes.connect_upstream_websocket") From f31d18ce3d54b2bd7623ae3ec8af928c7539f2ac Mon Sep 17 00:00:00 2001 From: PenguinDOOM Date: Sat, 9 May 2026 22:51:37 +0900 Subject: [PATCH 10/10] test: lock tool-search integration coverage --- tests/test_routes.py | 99 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/tests/test_routes.py b/tests/test_routes.py index 895b876..b94475b 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -1265,6 +1265,83 @@ def test_responses_route_preserves_explicit_previous_response_id_on_http(self, m self.assertEqual(outbound_payload["previous_response_id"], "resp_prev_explicit") self.assertEqual(outbound_payload["input"], native_input) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_preserves_deferred_namespace_on_http_follow_up_with_explicit_previous_response_id(self, mock_start) -> None: + mock_start.side_effect = [ + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_1", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_2", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ] + + headers = {"X-Session-Id": "session-fixed"} + first = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello"}, + headers=headers, + ) + native_input = [ + { + "type": "tool_search_output", + "execution": "client", + "call_id": "call_ts_resume", + "status": "completed", + "tools": [DEFERRED_TOOL_SEARCH_OUTPUT_TOOL_WITH_NAMESPACE], + }, + { + "type": "function_call", + "call_id": "call_grep_resume", + "name": "grep_search", + "namespace": "grep_search", + "arguments": json.dumps({"query": "*.ts"}), + }, + ] + + second = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "previous_response_id": "resp_prev_explicit", + "input": native_input, + }, + headers=headers, + ) + + self.assertEqual(first.status_code, 200) + self.assertEqual(second.status_code, 200) + outbound_payload = mock_start.call_args_list[1].args[0] + self.assertEqual(outbound_payload["previous_response_id"], "resp_prev_explicit") + self.assertEqual(outbound_payload["input"], native_input) + self.assertEqual(outbound_payload["input"][1]["namespace"], "grep_search") + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_does_not_auto_inject_previous_response_id_for_http_follow_up(self, mock_start) -> None: mock_start.side_effect = [ @@ -1492,6 +1569,28 @@ def test_responses_route_stream_passthrough(self, mock_start) -> None: self.assertEqual(response.status_code, 200) self.assertIn("response.output_text.delta", response.get_data(as_text=True)) + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_stream_passthroughs_tool_search_call_output_item(self, mock_start) -> None: + chunk = ( + b'data: {"type":"response.output_item.done","item":{"type":"tool_search_call","execution":"client","call_id":"call_ts_1","status":"completed","arguments":{"query":"workspace symbols"}}}\n\n' + ) + mock_start.return_value = ( + FakeUpstream( + headers={"Content-Type": "text/event-stream"}, + content=chunk, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "stream": True}, + ) + response_text = response.get_data(as_text=True) + + self.assertEqual(response.status_code, 200) + self.assertIn('"type":"tool_search_call"', response_text) + self.assertIn('"call_id":"call_ts_1"', response_text) + @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None: response = self.client.post(