diff --git a/.github/workflows/auto-release.yaml b/.github/workflows/auto-release.yaml new file mode 100644 index 00000000..d089c21d --- /dev/null +++ b/.github/workflows/auto-release.yaml @@ -0,0 +1,104 @@ +name: Auto Release + +on: + push: + branches: + - release + +jobs: + auto-tag-and-publish: + runs-on: ubuntu-latest + environment: production + + permissions: + contents: write + actions: write + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Read version from _version.py + id: get_version + run: | + chmod +x ./scripts/get_version.sh + VERSION=$(./scripts/get_version.sh) + if [ -z "$VERSION" ]; then + echo "Error: Could not extract version" + exit 1 + fi + TAG="v${VERSION}" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" + echo "Resolved tag: ${TAG}" + + - name: Check if tag already exists + id: check_tag + run: | + TAG="${{ steps.get_version.outputs.tag }}" + git fetch --tags --prune --force + if git rev-parse --verify "refs/tags/${TAG}" >/dev/null 2>&1; then + echo "Tag ${TAG} already exists on origin — nothing to do." + echo "exists=true" >> "$GITHUB_OUTPUT" + else + echo "Tag ${TAG} does not exist — will create." + echo "exists=false" >> "$GITHUB_OUTPUT" + fi + + - name: Configure git + if: steps.check_tag.outputs.exists == 'false' + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + - name: Create and push tag + if: steps.check_tag.outputs.exists == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ steps.get_version.outputs.tag }}" + git tag "${TAG}" "${GITHUB_SHA}" + git push origin "${TAG}" + echo "Pushed tag ${TAG} at ${GITHUB_SHA}" + + - name: Dispatch publish-to-aws workflow + if: steps.check_tag.outputs.exists == 'false' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ steps.get_version.outputs.tag }}" + gh workflow run publish-to-aws.yaml \ + --ref "${TAG}" \ + -f release_tag="${TAG}" + echo "Dispatched publish-to-aws.yaml for ${TAG}" + + - name: Update latest version pointer in CHANGELOG.md on main + if: steps.check_tag.outputs.exists == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + VERSION="${{ steps.get_version.outputs.version }}" + TAG="${{ steps.get_version.outputs.tag }}" + DATE=$(date -u +%Y-%m-%d) + NEW_LINE="**Latest version:** [${VERSION}](https://github.com/LayerLens/stratix-python/releases/tag/${TAG}) — ${DATE}" + + git fetch origin main + git checkout main + git pull --ff-only origin main + + if ! grep -q '^\*\*Latest version:\*\*' CHANGELOG.md; then + echo "Error: '**Latest version:**' line not found in CHANGELOG.md" + exit 1 + fi + + sed -i "s|^\*\*Latest version:\*\*.*|${NEW_LINE}|" CHANGELOG.md + + if git diff --quiet CHANGELOG.md; then + echo "CHANGELOG.md already up to date for ${TAG}" + exit 0 + fi + + git add CHANGELOG.md + git commit -m "chore: update latest version pointer to ${TAG}" + git push origin main diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..8643f010 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,184 @@ +# Changelog + +All notable changes to the Stratix Python SDK will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +**Latest version:** [1.7.0](https://github.com/LayerLens/stratix-python/releases/tag/v1.7.0) — 2026-05-20 + +## [Unreleased] + +Things we're actively working on. Want to help? Check the [issues](https://github.com/LayerLens/stratix-python/issues) or [discussions](https://github.com/LayerLens/stratix-python/discussions). + +### Added + +### Changed + +### Fixed + +### Deprecated + +### Removed + +## [1.7.0] - 2026-05-20 + +### Added + +- `extra_payload` parameter on `models.create_custom` and `models.update_custom` (sync + async). Optional JSON object merged into every outgoing chat-completions request body; customer values win on conflict with our hardcoded defaults. Lets customers add provider-specific fields (`top_p`, `max_completion_tokens`) or override values like `temperature` for providers that reject our defaults. + +## [1.6.1] - 2026-05-15 + +### Added + +- CLI authentication command (`layerlens auth`) (#72) +- `models.update_custom(model_id, *, api_url, api_key, max_tokens)` (sync + async) — repoint a custom model's mutable fields without recreating it (#169) +- `models.delete_custom(model_id)` (sync + async) — full teardown that disables the record, strips it from `Project.Models`, and releases the name for reuse (#169) +- 70+ production-ready SDK samples across 12 categories: core, industry, cowork, modalities, integrations, cicd, cli, openclaw, mcp, copilotkit, claude-code, data (#73) +- MCP server sample exposing LayerLens as tools +- CopilotKit sample with LangGraph CoAgents, React components, and hooks +- New trace samples (#144) + +### Changed + +- `models.add()` / `models.remove()` now operate on the full project model list (public + custom). The previous `type="public"` filter silently dropped custom-model IDs from `Project.Models` on every call (#169) +- Expanded SDK documentation and README (#139, #167) + +### Fixed + +- Trace evaluations bug (#74) +- CopilotKit evaluator graph now compiles with a checkpointer so `interrupt()` works over AG-UI. Includes a `RunIdPreservingAgent` workaround for the upstream `ag-ui-langgraph` runId-overwrite bug ([ag-ui-protocol/ag-ui#1582](https://github.com/ag-ui-protocol/ag-ui/issues/1582)) (#92) + +## [1.6.0] - 2026-03-25 + +### Added + +- Prompts exposed on the private client (#70) + +## [1.5.0] - 2026-03-23 + +### Added + +- Full-featured command-line interface via `layerlens` / `stratix` +- `client.scorers` resource with full CRUD: create, get, list, update, delete +- `client.evaluation_spaces` resource with get, list, create, update, delete +- `client.integrations` resource with get, list, create, update, delete, and test +- CLI getting started guide, command reference, and examples +- Scorers API reference documentation + +### Changed + +- Updated evaluations, models & benchmarks, and public client docs with new parameters + +### Fixed + +- `filter` by categories/languages/companies/regions/licenses now returns correct results + +## [1.4.0] - 2026-03-17 + +### Added + +- `unique` parameter on `evaluations.get_many()` and `public_evaluations.get_many()` that deduplicates results by model+dataset pair, keeping only the latest evaluation per pair + +### Fixed + +- Model comparison now passes `unique=True` when fetching evaluations, ensuring the correct (latest) evaluation is used for each model+benchmark pair instead of potentially picking up duplicates + +## [1.3.3] - 2026-03-17 + +### Added + +- Missing methods on `benchmarks` and `models` resources + +### Fixed + +- Inconsistent API naming across the SDK now follows a unified convention. Affected resources: comparisons, evaluations, judges, results, trace evaluations, traces, public benchmarks/evaluations/models (#61) +- `SUMMARY.md` structure and examples updated to match new naming + +## [1.3.2] - 2026-03-13 + +### Added + +- Documentation pages for GitBook: getting-started, troubleshooting, security + +### Fixed + +- `trace_evaluations.get_results()` no longer returns empty/None results. The API returns evaluation data (score, passed, reasoning, steps) directly, but the SDK was looking for a non-existent results array. `TraceEvaluationResultsResponse` now correctly maps to the API response shape and inherits from `TraceEvaluationResult` +- `TraceEvaluationStep` model now matches actual API fields (`tool`, `args`, `result`) instead of the incorrect (`step`, `reasoning`) + +## [1.3.1] - 2026-03-13 + +### Added + +- Automatic retry with exponential backoff for transient errors (HTTP 429, 500, 502, 503, 504) in both sync and async clients (up to 2 retries, respects `Retry-After` header, max 8s delay) +- Expanded documentation: updated README, examples for models/benchmarks, public API, and retrieving results + +## [1.3.0] - 2026-03-13 + +### Changed + +- Expanded model and benchmark result models with additional fields + +### Fixed + +- CI/CD publish workflows + +## [1.2.0] - 2026-03-13 + +### Added + +- `Stratix` / `AsyncStratix` clients (rebrand from Atlas) +- Judges resource with full CRUD +- Trace upload (JSON/JSONL up to 50 MB via presigned S3) and `trace_evaluations` resource +- Judge optimizations resource for tuning judge configurations +- `PublicClient` — a dedicated client for public endpoints (models, benchmarks, evaluations, comparisons), also accessible via `client.public` +- `get_by_key`, `add`, `remove`, `create_custom`, `create_smart` methods on Model & Benchmark resources +- `comparisons` resource for comparing evaluation results +- Apache 2.0 license + +### Changed + +- Expanded benchmark and model models with additional fields + +### Deprecated + +- `Atlas` client name — use `Stratix` instead (legacy `Atlas` aliases kept for backward compatibility) + +### Fixed + +- Evaluation status enum values + +## [1.0.2] - 2026-03-13 + +### Changed + +- Updated publish-to-AWS packaging job + +## [1.0.1] - 2026-03-13 + +### Fixed + +- Version bump + +## [1.0.0] - 2026-03-13 + +### Added + +- Initial release of the LayerLens evaluation SDK +- Sync and async clients for the LayerLens evaluation API +- `evaluations`, `results`, `models`, and `benchmarks` resources +- Typed exception hierarchy for API errors + +[Unreleased]: https://github.com/LayerLens/stratix-python/compare/v1.6.1...HEAD +[1.6.1]: https://github.com/LayerLens/stratix-python/compare/v1.6.0...v1.6.1 +[1.6.0]: https://github.com/LayerLens/stratix-python/compare/v1.5.0...v1.6.0 +[1.5.0]: https://github.com/LayerLens/stratix-python/compare/v1.4.0...v1.5.0 +[1.4.0]: https://github.com/LayerLens/stratix-python/compare/v1.3.3...v1.4.0 +[1.3.3]: https://github.com/LayerLens/stratix-python/compare/v1.3.2...v1.3.3 +[1.3.2]: https://github.com/LayerLens/stratix-python/compare/v1.3.1...v1.3.2 +[1.3.1]: https://github.com/LayerLens/stratix-python/compare/v1.3.0...v1.3.1 +[1.3.0]: https://github.com/LayerLens/stratix-python/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/LayerLens/stratix-python/compare/v1.0.2...v1.2.0 +[1.0.2]: https://github.com/LayerLens/stratix-python/compare/v1.0.1...v1.0.2 +[1.0.1]: https://github.com/LayerLens/stratix-python/compare/v1.0.0...v1.0.1 +[1.0.0]: https://github.com/LayerLens/stratix-python/releases/tag/v1.0.0 diff --git a/docs/README.md b/docs/README.md index b3191dbd..9a0ca606 100644 --- a/docs/README.md +++ b/docs/README.md @@ -176,9 +176,12 @@ response = client.models.create_custom( name="My Fine-tuned Model", key="my-org/custom-model-v1", description="Fine-tuned GPT for medical Q&A", - api_url="https://my-api.example.com/v1", + api_url="https://my-api.example.com/v1/chat/completions", max_tokens=4096, api_key=os.environ.get("MY_PROVIDER_API_KEY"), # optional + # Optional — merged into every request body. Useful for provider-specific + # fields or for overriding our defaults (e.g. {"temperature": 1}). + extra_payload={"top_p": 0.9}, ) print(f"Created model: {response.model_id}") ``` diff --git a/docs/api-reference/models-benchmarks.md b/docs/api-reference/models-benchmarks.md index aeffd32e..22a773c3 100644 --- a/docs/api-reference/models-benchmarks.md +++ b/docs/api-reference/models-benchmarks.md @@ -143,21 +143,28 @@ client = Stratix() success = client.models.remove("model-id-1", "model-id-2") ``` -### `create_custom(name, key, description, api_url, max_tokens, api_key=None, timeout=None)` +### `create_custom(name, key, description, api_url, max_tokens, api_key=None, extra_payload=None, timeout=None)` Creates a custom model backed by an OpenAI-compatible API endpoint. This allows you to evaluate any model accessible via a chat completions endpoint. #### Parameters -| Parameter | Type | Required | Description | -| ------------- | -------------------------------- | -------- | --------------------------------------------------------------------------------- | -| `name` | `str` | Yes | Model name (max 256 characters) | -| `key` | `str` | Yes | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)| -| `description` | `str` | Yes | Model description (max 500 characters) | -| `api_url` | `str` | Yes | Base URL of the OpenAI-compatible API endpoint | -| `max_tokens` | `int` | Yes | Maximum number of tokens the model supports | -| `api_key` | `str \| None` | No | API key for the model provider | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------- | +| `name` | `str` | Yes | Model name (max 256 characters) | +| `key` | `str` | Yes | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)| +| `description` | `str` | Yes | Model description (max 500 characters) | +| `api_url` | `str` | Yes | Full URL of the OpenAI-compatible chat completions endpoint | +| `max_tokens` | `int` | Yes | Maximum number of tokens the model supports | +| `api_key` | `str \| None` | No | API key for the model provider | +| `extra_payload` | `Dict[str, Any] \| None` | No | JSON object merged into every outgoing chat-completions request body (see below) | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | + +#### `extra_payload` semantics + +When set, the keys/values in `extra_payload` are deep-merged into every outgoing request body. Customer values **win on conflict** with our hardcoded defaults — use it to override `temperature` (we send `0` for reproducible evaluations) or to add provider-specific fields like `top_p`, `presence_penalty`, or `max_completion_tokens` (required by some OpenAI reasoning models that reject `max_tokens`). + +The keys `messages`, `model`, and `stream` are reserved and will be rejected. #### Returns @@ -178,28 +185,31 @@ result = client.models.create_custom( name="My Custom Model", key="my-org/custom-model-v1", description="Custom fine-tuned model served via vLLM", - api_url="https://my-model-endpoint.example.com/v1", + api_url="https://my-model-endpoint.example.com/v1/chat/completions", api_key="my-provider-api-key", max_tokens=4096, + # Optional — provider-specific overrides merged into every request body. + extra_payload={"top_p": 0.9}, ) if result: print(f"Created model: {result.model_id}") ``` -### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, timeout=None)` +### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, extra_payload=None, timeout=None)` -Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, or `max_tokens` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions. +Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, `max_tokens`, or `extra_payload` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions. #### Parameters -| Parameter | Type | Required | Description | -| ------------ | -------------------------------- | -------- | -------------------------------------------------------- | -| `model_id` | `str` | Yes | ID of the custom model to update | -| `api_url` | `str \| None` | No | New base URL for the OpenAI-compatible API endpoint | -| `api_key` | `str \| None` | No | New API key for the model provider | -| `max_tokens` | `int \| None` | No | New maximum tokens value | -| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | +| Parameter | Type | Required | Description | +| --------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------- | +| `model_id` | `str` | Yes | ID of the custom model to update | +| `api_url` | `str \| None` | No | New full URL of the OpenAI-compatible chat completions endpoint | +| `api_key` | `str \| None` | No | New API key for the model provider | +| `max_tokens` | `int \| None` | No | New maximum tokens value | +| `extra_payload` | `Dict[str, Any] \| None` | No | New JSON object merged into every outgoing request. Pass `{}` to clear it. | +| `timeout` | `float \| httpx.Timeout \| None` | No | Override request timeout | #### Returns @@ -213,7 +223,13 @@ client = Stratix() # Repoint the api_url without re-creating the model client.models.update_custom( "model-id-from-create-custom", - api_url="https://my-new-endpoint.example.com/v1", + api_url="https://my-new-endpoint.example.com/v1/chat/completions", +) + +# Override request parameters for a model that doesn't accept temperature=0 +client.models.update_custom( + "model-id-from-create-custom", + extra_payload={"temperature": 1}, ) ``` diff --git a/docs/examples/models-and-benchmarks.md b/docs/examples/models-and-benchmarks.md index e517344f..fa67154c 100644 --- a/docs/examples/models-and-benchmarks.md +++ b/docs/examples/models-and-benchmarks.md @@ -115,7 +115,7 @@ def main(): name="My Custom Model", key="my-org/custom-model-v1", description="Custom fine-tuned model served via vLLM", - api_url="https://my-model-endpoint.example.com/v1", + api_url="https://my-model-endpoint.example.com/v1/chat/completions", api_key=os.environ["MY_PROVIDER_API_KEY"], max_tokens=4096, ) @@ -152,7 +152,7 @@ def main(): name="My Tunnel-backed Model", key="my-org/tunnel-model-v1", description="vLLM served behind a cloudflared tunnel", - api_url="https://tunnel-1.example.com/v1", + api_url="https://tunnel-1.example.com/v1/chat/completions", api_key="my-provider-api-key", max_tokens=4096, ) @@ -161,7 +161,7 @@ def main(): # Later, when the tunnel URL changes: client.models.update_custom( result.model_id, - api_url="https://tunnel-2.example.com/v1", + api_url="https://tunnel-2.example.com/v1/chat/completions", ) # Run evaluations as usual — the model now points at the new endpoint. @@ -190,7 +190,7 @@ def main(): name="My Custom Model", key="my-org/custom-model-v2", description="Replacement after schema migration", - api_url="https://my-endpoint.example.com/v1", + api_url="https://my-endpoint.example.com/v1/chat/completions", api_key="my-provider-api-key", max_tokens=4096, ) diff --git a/samples/claude-code/skills/benchmark.md b/samples/claude-code/skills/benchmark.md index 7926da6a..8083727c 100644 --- a/samples/claude-code/skills/benchmark.md +++ b/samples/claude-code/skills/benchmark.md @@ -38,7 +38,7 @@ model = client.models.create_custom( name="My Custom Model", key="my-custom-model", description="Fine-tuned GPT for legal analysis", - api_url="https://api.example.com/v1/completions", + api_url="https://api.example.com/v1/chat/completions", max_tokens=4096, api_key="sk-...", ) diff --git a/samples/core/custom_model.py b/samples/core/custom_model.py index e660fb04..6ff83ec8 100644 --- a/samples/core/custom_model.py +++ b/samples/core/custom_model.py @@ -39,9 +39,14 @@ def main() -> None: name="My Custom Model", key="my-org/custom-model-v1", description="Custom fine-tuned model served via vLLM", - api_url="https://my-model-endpoint.example.com/v1", + api_url="https://my-model-endpoint.example.com/v1/chat/completions", api_key="my-provider-api-key", max_tokens=4096, + # Optional -- merged into every outgoing request body. Customer + # values win on conflict with our hardcoded defaults (we send + # `temperature: 0` for reproducible evals; override here for + # providers that reject it, e.g. {"temperature": 1}). + extra_payload={"top_p": 0.9}, ) if result: @@ -69,7 +74,7 @@ def main() -> None: updated = client.models.update_custom( result.model_id, - api_url="https://my-new-endpoint.example.com/v1", + api_url="https://my-new-endpoint.example.com/v1/chat/completions", ) if updated: print(f"\nCustom model {result.model_id} api_url updated") diff --git a/src/layerlens/_version.py b/src/layerlens/_version.py index fb6b8f67..9ce8ca24 100644 --- a/src/layerlens/_version.py +++ b/src/layerlens/_version.py @@ -1,4 +1,4 @@ -__version__ = "1.6.1" +__version__ = "1.7.0" # Will be templated during the build __git_commit__ = "__GIT_COMMIT__" diff --git a/src/layerlens/resources/models/models.py b/src/layerlens/resources/models/models.py index 30ad5579..f29c2a97 100644 --- a/src/layerlens/resources/models/models.py +++ b/src/layerlens/resources/models/models.py @@ -220,6 +220,7 @@ def create_custom( api_url: str, max_tokens: int, api_key: Optional[str] = None, + extra_payload: Optional[Dict[str, Any]] = None, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, ) -> Optional[CreateModelResponse]: """Create a custom model backed by an OpenAI-compatible API. @@ -231,6 +232,11 @@ def create_custom( api_url: Base URL of the OpenAI-compatible API endpoint. max_tokens: Maximum number of tokens the model supports. api_key: Optional API key for the model provider. + extra_payload: Optional JSON object merged into every outgoing + chat-completions request body. Customer values win on conflict + with our defaults (e.g. set ``{"temperature": 1}`` for + providers that reject ``temperature: 0``). Keys ``messages``, + ``model`` and ``stream`` are reserved. timeout: Request timeout override. Returns: @@ -246,6 +252,8 @@ def create_custom( } if api_key is not None: body["api_key"] = api_key + if extra_payload is not None: + body["extra_payload"] = extra_payload resp = self._post( f"{base}/custom-models", @@ -266,12 +274,13 @@ def update_custom( api_url: Optional[str] = None, api_key: Optional[str] = None, max_tokens: Optional[int] = None, + extra_payload: Optional[Dict[str, Any]] = None, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, ) -> bool: """Update a custom model's mutable fields. - At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be - provided. Returns ``True`` on success. + At least one of ``api_url``, ``api_key``, ``max_tokens`` or + ``extra_payload`` must be provided. Returns ``True`` on success. Primary use case: repointing ``api_url`` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions. @@ -281,6 +290,9 @@ def update_custom( api_url: New base URL for the OpenAI-compatible API endpoint. api_key: New API key for the model provider. max_tokens: New maximum tokens value. + extra_payload: New JSON object merged into every outgoing request + body. Pass ``{}`` to clear the existing payload. See + ``create_custom`` for semantics. timeout: Request timeout override. """ url = ( @@ -293,6 +305,8 @@ def update_custom( body["api_key"] = api_key if max_tokens is not None: body["max_tokens"] = max_tokens + if extra_payload is not None: + body["extra_payload"] = extra_payload resp = self._patch(url, body=body, timeout=timeout, cast_to=dict) return isinstance(resp, dict) and "data" in resp @@ -477,6 +491,7 @@ async def create_custom( api_url: str, max_tokens: int, api_key: Optional[str] = None, + extra_payload: Optional[Dict[str, Any]] = None, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, ) -> Optional[CreateModelResponse]: """Create a custom model backed by an OpenAI-compatible API. @@ -488,6 +503,11 @@ async def create_custom( api_url: Base URL of the OpenAI-compatible API endpoint. max_tokens: Maximum number of tokens the model supports. api_key: Optional API key for the model provider. + extra_payload: Optional JSON object merged into every outgoing + chat-completions request body. Customer values win on conflict + with our defaults (e.g. set ``{"temperature": 1}`` for + providers that reject ``temperature: 0``). Keys ``messages``, + ``model`` and ``stream`` are reserved. timeout: Request timeout override. Returns: @@ -503,6 +523,8 @@ async def create_custom( } if api_key is not None: body["api_key"] = api_key + if extra_payload is not None: + body["extra_payload"] = extra_payload resp = await self._post( f"{base}/custom-models", @@ -523,12 +545,23 @@ async def update_custom( api_url: Optional[str] = None, api_key: Optional[str] = None, max_tokens: Optional[int] = None, + extra_payload: Optional[Dict[str, Any]] = None, timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT, ) -> bool: """Update a custom model's mutable fields. - At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be - provided. Returns ``True`` on success. + At least one of ``api_url``, ``api_key``, ``max_tokens`` or + ``extra_payload`` must be provided. Returns ``True`` on success. + + Args: + model_id: ID of the custom model to update. + api_url: New base URL for the OpenAI-compatible API endpoint. + api_key: New API key for the model provider. + max_tokens: New maximum tokens value. + extra_payload: New JSON object merged into every outgoing request + body. Pass ``{}`` to clear the existing payload. See + ``create_custom`` for semantics. + timeout: Request timeout override. """ url = ( f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/custom-models/{model_id}" @@ -540,6 +573,8 @@ async def update_custom( body["api_key"] = api_key if max_tokens is not None: body["max_tokens"] = max_tokens + if extra_payload is not None: + body["extra_payload"] = extra_payload resp = await self._patch(url, body=body, timeout=timeout, cast_to=dict) return isinstance(resp, dict) and "data" in resp diff --git a/tests/resources/test_models_resource.py b/tests/resources/test_models_resource.py index 6fbd4895..5f4c90fb 100644 --- a/tests/resources/test_models_resource.py +++ b/tests/resources/test_models_resource.py @@ -798,6 +798,44 @@ def test_create_custom_omits_api_key_when_none(self, models_resource): call_body = models_resource._post.call_args.kwargs["body"] assert "api_key" not in call_body + def test_create_custom_includes_extra_payload_when_provided(self, models_resource): + """create_custom() forwards extra_payload verbatim.""" + models_resource._post.return_value = { + "status": "success", + "data": {"model_id": "x", "organization_id": "o", "project_id": "p"}, + } + + payload = {"top_p": 0.9, "provider": {"order": ["anthropic"]}} + models_resource.create_custom( + name="My Model", + key="my/model", + description="desc", + api_url="https://example.com/v1/chat/completions", + max_tokens=4096, + extra_payload=payload, + ) + + call_body = models_resource._post.call_args.kwargs["body"] + assert call_body["extra_payload"] == payload + + def test_create_custom_omits_extra_payload_when_none(self, models_resource): + """create_custom() does not include extra_payload when not provided.""" + models_resource._post.return_value = { + "status": "success", + "data": {"model_id": "x", "organization_id": "o", "project_id": "p"}, + } + + models_resource.create_custom( + name="My Model", + key="my/model", + description="desc", + api_url="https://example.com/v1/chat/completions", + max_tokens=4096, + ) + + call_body = models_resource._post.call_args.kwargs["body"] + assert "extra_payload" not in call_body + def test_create_custom_correct_url(self, models_resource): """create_custom() posts to the correct endpoint.""" models_resource._post.return_value = { @@ -1208,6 +1246,36 @@ def test_update_custom_max_tokens_only(self, models_resource): body = models_resource._patch.call_args.kwargs["body"] assert body == {"max_tokens": 8192} + def test_update_custom_extra_payload_only(self, models_resource): + """update_custom() supports extra_payload-only updates.""" + models_resource._patch.return_value = {"data": {"id": "model-1"}} + + payload = {"temperature": 1, "top_p": 0.9} + result = models_resource.update_custom("model-1", extra_payload=payload) + + assert result is True + body = models_resource._patch.call_args.kwargs["body"] + assert body == {"extra_payload": payload} + + def test_update_custom_extra_payload_empty_dict_clears_payload(self, models_resource): + """update_custom(extra_payload={}) sends the empty dict so the backend clears the stored payload.""" + models_resource._patch.return_value = {"data": {"id": "model-1"}} + + result = models_resource.update_custom("model-1", extra_payload={}) + + assert result is True + body = models_resource._patch.call_args.kwargs["body"] + assert body == {"extra_payload": {}} + + def test_update_custom_omits_extra_payload_when_none(self, models_resource): + """update_custom() does not include extra_payload when not provided.""" + models_resource._patch.return_value = {"data": {"id": "model-1"}} + + models_resource.update_custom("model-1", api_url="https://x.io") + + body = models_resource._patch.call_args.kwargs["body"] + assert "extra_payload" not in body + def test_update_custom_returns_false_on_error_envelope(self, models_resource): """update_custom() returns False when response has no data field.""" models_resource._patch.return_value = {"code": "NOT_FOUND", "message": "missing"}