From 3b6823c53f731a496efab04190c226d9ae1eb66d Mon Sep 17 00:00:00 2001 From: Typo Fix Bot Date: Fri, 5 Jun 2026 06:05:54 +0000 Subject: [PATCH] fix(llm): make ProviderClient.chat attempt count match MAX_RETRIES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ProviderClient.chat` used `while retries <= MAX_RETRIES:` with `retries = 0`, so an operator setting `MAX_RETRIES=3` actually triggered four LLM calls. The final log line printed `MAX_RETRIES` rather than the attempt count, hiding the off-by-one behind a misleading number. Replace the `while` loop with `for attempt in range(1, MAX_RETRIES + 1):`. The body no longer carries a manual `retries` counter and the `last_error` variable is properly typed. The final error log now reports the actual number of attempts (`"after N attempt(s)"`) and the ProviderError that gets raised still carries status_code 502. Adds `backend/tests/test_provider_client_retry.py` with two cases: - `litellm.completion` fails once then succeeds → exactly 2 calls. - `litellm.completion` always fails → exactly MAX_RETRIES calls, the final error log contains "attempt" and no longer contains "retries". --- backend/app/llm/provider_client.py | 16 ++- backend/tests/test_provider_client_retry.py | 103 ++++++++++++++++++++ 2 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 backend/tests/test_provider_client_retry.py diff --git a/backend/app/llm/provider_client.py b/backend/app/llm/provider_client.py index 8a17ab3..1c048df 100644 --- a/backend/app/llm/provider_client.py +++ b/backend/app/llm/provider_client.py @@ -112,10 +112,9 @@ def chat( messages_dict = [{"role": m.role, "content": m.content} for m in messages] start_time = time.time() - retries = 0 - last_error = None + last_error: Optional[Exception] = None - while retries <= self.settings.MAX_RETRIES: + for attempt in range(1, self.settings.MAX_RETRIES + 1): try: response = litellm.completion( model=model_string, @@ -147,20 +146,19 @@ def chat( ) except Exception as e: last_error = e - retries += 1 - if retries <= self.settings.MAX_RETRIES: - backoff = self.settings.RETRY_BACKOFF * (2 ** (retries - 1)) + if attempt < self.settings.MAX_RETRIES: + backoff = self.settings.RETRY_BACKOFF * (2 ** (attempt - 1)) logger.warning( "Provider request failed (attempt %s/%s): %s. Retrying in %ss...", - retries, + attempt, self.settings.MAX_RETRIES, e, backoff, ) time.sleep(backoff) - error_msg = str(last_error) - logger.error("Provider request failed after %s retries: %s", self.settings.MAX_RETRIES, error_msg) + error_msg = str(last_error) if last_error else "unknown error" + logger.error("Provider request failed after %s attempt(s): %s", self.settings.MAX_RETRIES, error_msg) raise ProviderError( f"Provider '{self.provider_name}' request failed: {error_msg}", self.provider_name, diff --git a/backend/tests/test_provider_client_retry.py b/backend/tests/test_provider_client_retry.py new file mode 100644 index 0000000..6123072 --- /dev/null +++ b/backend/tests/test_provider_client_retry.py @@ -0,0 +1,103 @@ +"""Tests for ProviderClient.chat retry behavior. + +These tests pin down the attempt-count semantics: with MAX_RETRIES=N the +client should make at most N attempts, and the failure log should say +"attempt(s)" rather than the misleading "retries". +""" +from __future__ import annotations + +import sys +import time +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +# Make the backend importable without installing the package. +ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(ROOT / "backend")) + +from app.llm.provider_client import ProviderClient, ProviderError # noqa: E402 + + +class _FakeSettings: + MAX_RETRIES = 2 + RETRY_BACKOFF = 0.0 # no sleeping in tests + + def get_active_model(self, provider): + return "fake-model" + + def get_runtime_api_key(self, provider): + return "test-key" + + def get_runtime_base_url(self, provider): + return None + + def _get_default_base_url(self, provider): + return None + + +class _FakeConfig: + api_format = "openai" + + def get_api_key(self): + return "test-key" + + def get_base_url(self): + return None + + +def _make_client() -> ProviderClient: + return ProviderClient(provider_name="openai") + + +def _ok_response(): + usage = SimpleNamespace(prompt_tokens=1, completion_tokens=2, total_tokens=3) + choice = SimpleNamespace( + message=SimpleNamespace(content="ok"), + finish_reason="stop", + ) + return SimpleNamespace(choices=[choice], usage=usage) + + +def test_chat_makes_exactly_max_retries_attempts_then_succeeds(monkeypatch): + client = _make_client() + client.settings = _FakeSettings() + client.config = _FakeConfig() + calls = {"n": 0} + + def fake_completion(**kwargs): + calls["n"] += 1 + if calls["n"] < 2: + raise RuntimeError("transient") + return _ok_response() + + monkeypatch.setattr(client, "_get_litellm", lambda: SimpleNamespace(completion=fake_completion)) + monkeypatch.setattr(client, "_get_api_config", lambda: {"api_key": "k", "api_base": None, "timeout": 5}) + + out = client.chat(messages=[SimpleNamespace(role="user", content="hi")]) + assert out.text == "ok" + assert calls["n"] == 2 # MAX_RETRIES=2, succeeded on attempt 2 + + +def test_chat_stops_after_max_retries_attempts(monkeypatch, caplog): + client = _make_client() + client.settings = _FakeSettings() + client.config = _FakeConfig() + calls = {"n": 0} + + def fake_completion(**kwargs): + calls["n"] += 1 + raise RuntimeError("always fails") + + monkeypatch.setattr(client, "_get_litellm", lambda: SimpleNamespace(completion=fake_completion)) + monkeypatch.setattr(client, "_get_api_config", lambda: {"api_key": "k", "api_base": None, "timeout": 5}) + + with caplog.at_level("ERROR", logger="app.llm.provider_client"): + with pytest.raises(ProviderError) as ei: + client.chat(messages=[SimpleNamespace(role="user", content="hi")]) + assert calls["n"] == _FakeSettings.MAX_RETRIES + assert "attempt" in caplog.text + assert "retries" not in caplog.text + assert ei.value.status_code == 502