diff --git a/.github/workflows/auto-release.yaml b/.github/workflows/auto-release.yaml
new file mode 100644
index 00000000..d089c21d
--- /dev/null
+++ b/.github/workflows/auto-release.yaml
@@ -0,0 +1,104 @@
+name: Auto Release
+
+on:
+  push:
+    branches:
+      - release
+
+jobs:
+  auto-tag-and-publish:
+    runs-on: ubuntu-latest
+    environment: production
+
+    permissions:
+      contents: write
+      actions: write
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Read version from _version.py
+        id: get_version
+        run: |
+          chmod +x ./scripts/get_version.sh
+          VERSION=$(./scripts/get_version.sh)
+          if [ -z "$VERSION" ]; then
+            echo "Error: Could not extract version"
+            exit 1
+          fi
+          TAG="v${VERSION}"
+          echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+          echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
+          echo "Resolved tag: ${TAG}"
+
+      - name: Check if tag already exists
+        id: check_tag
+        run: |
+          TAG="${{ steps.get_version.outputs.tag }}"
+          git fetch --tags --prune --force
+          if git rev-parse --verify "refs/tags/${TAG}" >/dev/null 2>&1; then
+            echo "Tag ${TAG} already exists on origin — nothing to do."
+            echo "exists=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "Tag ${TAG} does not exist — will create."
+            echo "exists=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Configure git
+        if: steps.check_tag.outputs.exists == 'false'
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Create and push tag
+        if: steps.check_tag.outputs.exists == 'false'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="${{ steps.get_version.outputs.tag }}"
+          git tag "${TAG}" "${GITHUB_SHA}"
+          git push origin "${TAG}"
+          echo "Pushed tag ${TAG} at ${GITHUB_SHA}"
+
+      - name: Dispatch publish-to-aws workflow
+        if: steps.check_tag.outputs.exists == 'false'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="${{ steps.get_version.outputs.tag }}"
+          gh workflow run publish-to-aws.yaml \
+            --ref "${TAG}" \
+            -f release_tag="${TAG}"
+          echo "Dispatched publish-to-aws.yaml for ${TAG}"
+
+      - name: Update latest version pointer in CHANGELOG.md on main
+        if: steps.check_tag.outputs.exists == 'false'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          VERSION="${{ steps.get_version.outputs.version }}"
+          TAG="${{ steps.get_version.outputs.tag }}"
+          DATE=$(date -u +%Y-%m-%d)
+          NEW_LINE="**Latest version:** [${VERSION}](https://github.com/LayerLens/stratix-python/releases/tag/${TAG}) — ${DATE}"
+
+          git fetch origin main
+          git checkout main
+          git pull --ff-only origin main
+
+          if ! grep -q '^\*\*Latest version:\*\*' CHANGELOG.md; then
+            echo "Error: '**Latest version:**' line not found in CHANGELOG.md"
+            exit 1
+          fi
+
+          sed -i "s|^\*\*Latest version:\*\*.*|${NEW_LINE}|" CHANGELOG.md
+
+          if git diff --quiet CHANGELOG.md; then
+            echo "CHANGELOG.md already up to date for ${TAG}"
+            exit 0
+          fi
+
+          git add CHANGELOG.md
+          git commit -m "chore: update latest version pointer to ${TAG}"
+          git push origin main
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..8643f010
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,184 @@
+# Changelog
+
+All notable changes to the Stratix Python SDK will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+**Latest version:** [1.7.0](https://github.com/LayerLens/stratix-python/releases/tag/v1.7.0) — 2026-05-20
+
+## [Unreleased]
+
+Things we're actively working on. Want to help? Check the [issues](https://github.com/LayerLens/stratix-python/issues) or [discussions](https://github.com/LayerLens/stratix-python/discussions).
+
+### Added
+
+### Changed
+
+### Fixed
+
+### Deprecated
+
+### Removed
+
+## [1.7.0] - 2026-05-20
+
+### Added
+
+- `extra_payload` parameter on `models.create_custom` and `models.update_custom` (sync + async). Optional JSON object merged into every outgoing chat-completions request body; customer values win on conflict with our hardcoded defaults. Lets customers add provider-specific fields (`top_p`, `max_completion_tokens`) or override values like `temperature` for providers that reject our defaults.
+
+## [1.6.1] - 2026-05-15
+
+### Added
+
+- CLI authentication command (`layerlens auth`) (#72)
+- `models.update_custom(model_id, *, api_url, api_key, max_tokens)` (sync + async) — repoint a custom model's mutable fields without recreating it (#169)
+- `models.delete_custom(model_id)` (sync + async) — full teardown that disables the record, strips it from `Project.Models`, and releases the name for reuse (#169)
+- 70+ production-ready SDK samples across 12 categories: core, industry, cowork, modalities, integrations, cicd, cli, openclaw, mcp, copilotkit, claude-code, data (#73)
+- MCP server sample exposing LayerLens as tools
+- CopilotKit sample with LangGraph CoAgents, React components, and hooks
+- New trace samples (#144)
+
+### Changed
+
+- `models.add()` / `models.remove()` now operate on the full project model list (public + custom). The previous `type="public"` filter silently dropped custom-model IDs from `Project.Models` on every call (#169)
+- Expanded SDK documentation and README (#139, #167)
+
+### Fixed
+
+- Trace evaluations bug (#74)
+- CopilotKit evaluator graph now compiles with a checkpointer so `interrupt()` works over AG-UI. Includes a `RunIdPreservingAgent` workaround for the upstream `ag-ui-langgraph` runId-overwrite bug ([ag-ui-protocol/ag-ui#1582](https://github.com/ag-ui-protocol/ag-ui/issues/1582)) (#92)
+
+## [1.6.0] - 2026-03-25
+
+### Added
+
+- Prompts exposed on the private client (#70)
+
+## [1.5.0] - 2026-03-23
+
+### Added
+
+- Full-featured command-line interface via `layerlens` / `stratix`
+- `client.scorers` resource with full CRUD: create, get, list, update, delete
+- `client.evaluation_spaces` resource with get, list, create, update, delete
+- `client.integrations` resource with get, list, create, update, delete, and test
+- CLI getting started guide, command reference, and examples
+- Scorers API reference documentation
+
+### Changed
+
+- Updated evaluations, models & benchmarks, and public client docs with new parameters
+
+### Fixed
+
+- `filter` by categories/languages/companies/regions/licenses now returns correct results
+
+## [1.4.0] - 2026-03-17
+
+### Added
+
+- `unique` parameter on `evaluations.get_many()` and `public_evaluations.get_many()` that deduplicates results by model+dataset pair, keeping only the latest evaluation per pair
+
+### Fixed
+
+- Model comparison now passes `unique=True` when fetching evaluations, ensuring the correct (latest) evaluation is used for each model+benchmark pair instead of potentially picking up duplicates
+
+## [1.3.3] - 2026-03-17
+
+### Added
+
+- Missing methods on `benchmarks` and `models` resources
+
+### Fixed
+
+- Inconsistent API naming across the SDK now follows a unified convention. Affected resources: comparisons, evaluations, judges, results, trace evaluations, traces, public benchmarks/evaluations/models (#61)
+- `SUMMARY.md` structure and examples updated to match new naming
+
+## [1.3.2] - 2026-03-13
+
+### Added
+
+- Documentation pages for GitBook: getting-started, troubleshooting, security
+
+### Fixed
+
+- `trace_evaluations.get_results()` no longer returns empty/None results. The API returns evaluation data (score, passed, reasoning, steps) directly, but the SDK was looking for a non-existent results array. `TraceEvaluationResultsResponse` now correctly maps to the API response shape and inherits from `TraceEvaluationResult`
+- `TraceEvaluationStep` model now matches actual API fields (`tool`, `args`, `result`) instead of the incorrect (`step`, `reasoning`)
+
+## [1.3.1] - 2026-03-13
+
+### Added
+
+- Automatic retry with exponential backoff for transient errors (HTTP 429, 500, 502, 503, 504) in both sync and async clients (up to 2 retries, respects `Retry-After` header, max 8s delay)
+- Expanded documentation: updated README, examples for models/benchmarks, public API, and retrieving results
+
+## [1.3.0] - 2026-03-13
+
+### Changed
+
+- Expanded model and benchmark result models with additional fields
+
+### Fixed
+
+- CI/CD publish workflows
+
+## [1.2.0] - 2026-03-13
+
+### Added
+
+- `Stratix` / `AsyncStratix` clients (rebrand from Atlas)
+- Judges resource with full CRUD
+- Trace upload (JSON/JSONL up to 50 MB via presigned S3) and `trace_evaluations` resource
+- Judge optimizations resource for tuning judge configurations
+- `PublicClient` — a dedicated client for public endpoints (models, benchmarks, evaluations, comparisons), also accessible via `client.public`
+- `get_by_key`, `add`, `remove`, `create_custom`, `create_smart` methods on Model & Benchmark resources
+- `comparisons` resource for comparing evaluation results
+- Apache 2.0 license
+
+### Changed
+
+- Expanded benchmark and model models with additional fields
+
+### Deprecated
+
+- `Atlas` client name — use `Stratix` instead (legacy `Atlas` aliases kept for backward compatibility)
+
+### Fixed
+
+- Evaluation status enum values
+
+## [1.0.2] - 2026-03-13
+
+### Changed
+
+- Updated publish-to-AWS packaging job
+
+## [1.0.1] - 2026-03-13
+
+### Fixed
+
+- Version bump
+
+## [1.0.0] - 2026-03-13
+
+### Added
+
+- Initial release of the LayerLens evaluation SDK
+- Sync and async clients for the LayerLens evaluation API
+- `evaluations`, `results`, `models`, and `benchmarks` resources
+- Typed exception hierarchy for API errors
+
+[Unreleased]: https://github.com/LayerLens/stratix-python/compare/v1.6.1...HEAD
+[1.6.1]: https://github.com/LayerLens/stratix-python/compare/v1.6.0...v1.6.1
+[1.6.0]: https://github.com/LayerLens/stratix-python/compare/v1.5.0...v1.6.0
+[1.5.0]: https://github.com/LayerLens/stratix-python/compare/v1.4.0...v1.5.0
+[1.4.0]: https://github.com/LayerLens/stratix-python/compare/v1.3.3...v1.4.0
+[1.3.3]: https://github.com/LayerLens/stratix-python/compare/v1.3.2...v1.3.3
+[1.3.2]: https://github.com/LayerLens/stratix-python/compare/v1.3.1...v1.3.2
+[1.3.1]: https://github.com/LayerLens/stratix-python/compare/v1.3.0...v1.3.1
+[1.3.0]: https://github.com/LayerLens/stratix-python/compare/v1.2.0...v1.3.0
+[1.2.0]: https://github.com/LayerLens/stratix-python/compare/v1.0.2...v1.2.0
+[1.0.2]: https://github.com/LayerLens/stratix-python/compare/v1.0.1...v1.0.2
+[1.0.1]: https://github.com/LayerLens/stratix-python/compare/v1.0.0...v1.0.1
+[1.0.0]: https://github.com/LayerLens/stratix-python/releases/tag/v1.0.0
diff --git a/docs/README.md b/docs/README.md
index b3191dbd..9a0ca606 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -176,9 +176,12 @@ response = client.models.create_custom(
     name="My Fine-tuned Model",
     key="my-org/custom-model-v1",
     description="Fine-tuned GPT for medical Q&A",
-    api_url="https://my-api.example.com/v1",
+    api_url="https://my-api.example.com/v1/chat/completions",
     max_tokens=4096,
     api_key=os.environ.get("MY_PROVIDER_API_KEY"),  # optional
+    # Optional — merged into every request body. Useful for provider-specific
+    # fields or for overriding our defaults (e.g. {"temperature": 1}).
+    extra_payload={"top_p": 0.9},
 )
 print(f"Created model: {response.model_id}")
 ```
diff --git a/docs/api-reference/models-benchmarks.md b/docs/api-reference/models-benchmarks.md
index aeffd32e..22a773c3 100644
--- a/docs/api-reference/models-benchmarks.md
+++ b/docs/api-reference/models-benchmarks.md
@@ -143,21 +143,28 @@ client = Stratix()
 success = client.models.remove("model-id-1", "model-id-2")
 ```
 
-### `create_custom(name, key, description, api_url, max_tokens, api_key=None, timeout=None)`
+### `create_custom(name, key, description, api_url, max_tokens, api_key=None, extra_payload=None, timeout=None)`
 
 Creates a custom model backed by an OpenAI-compatible API endpoint. This allows you to evaluate any model accessible via a chat completions endpoint.
 
 #### Parameters
 
-| Parameter     | Type                             | Required | Description                                                                       |
-| ------------- | -------------------------------- | -------- | --------------------------------------------------------------------------------- |
-| `name`        | `str`                            | Yes      | Model name (max 256 characters)                                                   |
-| `key`         | `str`                            | Yes      | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)|
-| `description` | `str`                            | Yes      | Model description (max 500 characters)                                            |
-| `api_url`     | `str`                            | Yes      | Base URL of the OpenAI-compatible API endpoint                                    |
-| `max_tokens`  | `int`                            | Yes      | Maximum number of tokens the model supports                                       |
-| `api_key`     | `str \| None`                    | No       | API key for the model provider                                                    |
-| `timeout`     | `float \| httpx.Timeout \| None` | No       | Override request timeout                                                          |
+| Parameter       | Type                              | Required | Description                                                                       |
+| --------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------- |
+| `name`          | `str`                             | Yes      | Model name (max 256 characters)                                                   |
+| `key`           | `str`                             | Yes      | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)|
+| `description`   | `str`                             | Yes      | Model description (max 500 characters)                                            |
+| `api_url`       | `str`                             | Yes      | Full URL of the OpenAI-compatible chat completions endpoint                       |
+| `max_tokens`    | `int`                             | Yes      | Maximum number of tokens the model supports                                       |
+| `api_key`       | `str \| None`                     | No       | API key for the model provider                                                    |
+| `extra_payload` | `Dict[str, Any] \| None`          | No       | JSON object merged into every outgoing chat-completions request body (see below)  |
+| `timeout`       | `float \| httpx.Timeout \| None`  | No       | Override request timeout                                                          |
+
+#### `extra_payload` semantics
+
+When set, the keys/values in `extra_payload` are deep-merged into every outgoing request body. Customer values **win on conflict** with our hardcoded defaults — use it to override `temperature` (we send `0` for reproducible evaluations) or to add provider-specific fields like `top_p`, `presence_penalty`, or `max_completion_tokens` (required by some OpenAI reasoning models that reject `max_tokens`).
+
+The keys `messages`, `model`, and `stream` are reserved and will be rejected.
 
 #### Returns
 
@@ -178,28 +185,31 @@ result = client.models.create_custom(
     name="My Custom Model",
     key="my-org/custom-model-v1",
     description="Custom fine-tuned model served via vLLM",
-    api_url="https://my-model-endpoint.example.com/v1",
+    api_url="https://my-model-endpoint.example.com/v1/chat/completions",
     api_key="my-provider-api-key",
     max_tokens=4096,
+    # Optional — provider-specific overrides merged into every request body.
+    extra_payload={"top_p": 0.9},
 )
 
 if result:
     print(f"Created model: {result.model_id}")
 ```
 
-### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, timeout=None)`
+### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, extra_payload=None, timeout=None)`
 
-Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, or `max_tokens` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions.
+Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, `max_tokens`, or `extra_payload` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions.
 
 #### Parameters
 
-| Parameter    | Type                             | Required | Description                                              |
-| ------------ | -------------------------------- | -------- | -------------------------------------------------------- |
-| `model_id`   | `str`                            | Yes      | ID of the custom model to update                         |
-| `api_url`    | `str \| None`                    | No       | New base URL for the OpenAI-compatible API endpoint      |
-| `api_key`    | `str \| None`                    | No       | New API key for the model provider                       |
-| `max_tokens` | `int \| None`                    | No       | New maximum tokens value                                 |
-| `timeout`    | `float \| httpx.Timeout \| None` | No       | Override request timeout                                 |
+| Parameter       | Type                              | Required | Description                                                                      |
+| --------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------- |
+| `model_id`      | `str`                             | Yes      | ID of the custom model to update                                                 |
+| `api_url`       | `str \| None`                     | No       | New full URL of the OpenAI-compatible chat completions endpoint                  |
+| `api_key`       | `str \| None`                     | No       | New API key for the model provider                                               |
+| `max_tokens`    | `int \| None`                     | No       | New maximum tokens value                                                         |
+| `extra_payload` | `Dict[str, Any] \| None`          | No       | New JSON object merged into every outgoing request. Pass `{}` to clear it.       |
+| `timeout`       | `float \| httpx.Timeout \| None`  | No       | Override request timeout                                                         |
 
 #### Returns
 
@@ -213,7 +223,13 @@ client = Stratix()
 # Repoint the api_url without re-creating the model
 client.models.update_custom(
     "model-id-from-create-custom",
-    api_url="https://my-new-endpoint.example.com/v1",
+    api_url="https://my-new-endpoint.example.com/v1/chat/completions",
+)
+
+# Override request parameters for a model that doesn't accept temperature=0
+client.models.update_custom(
+    "model-id-from-create-custom",
+    extra_payload={"temperature": 1},
 )
 ```
 
diff --git a/docs/examples/models-and-benchmarks.md b/docs/examples/models-and-benchmarks.md
index e517344f..fa67154c 100644
--- a/docs/examples/models-and-benchmarks.md
+++ b/docs/examples/models-and-benchmarks.md
@@ -115,7 +115,7 @@ def main():
         name="My Custom Model",
         key="my-org/custom-model-v1",
         description="Custom fine-tuned model served via vLLM",
-        api_url="https://my-model-endpoint.example.com/v1",
+        api_url="https://my-model-endpoint.example.com/v1/chat/completions",
         api_key=os.environ["MY_PROVIDER_API_KEY"],
         max_tokens=4096,
     )
@@ -152,7 +152,7 @@ def main():
         name="My Tunnel-backed Model",
         key="my-org/tunnel-model-v1",
         description="vLLM served behind a cloudflared tunnel",
-        api_url="https://tunnel-1.example.com/v1",
+        api_url="https://tunnel-1.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
     )
@@ -161,7 +161,7 @@ def main():
     # Later, when the tunnel URL changes:
     client.models.update_custom(
         result.model_id,
-        api_url="https://tunnel-2.example.com/v1",
+        api_url="https://tunnel-2.example.com/v1/chat/completions",
     )
 
     # Run evaluations as usual — the model now points at the new endpoint.
@@ -190,7 +190,7 @@ def main():
         name="My Custom Model",
         key="my-org/custom-model-v2",
         description="Replacement after schema migration",
-        api_url="https://my-endpoint.example.com/v1",
+        api_url="https://my-endpoint.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
     )
diff --git a/samples/claude-code/skills/benchmark.md b/samples/claude-code/skills/benchmark.md
index 7926da6a..8083727c 100644
--- a/samples/claude-code/skills/benchmark.md
+++ b/samples/claude-code/skills/benchmark.md
@@ -38,7 +38,7 @@ model = client.models.create_custom(
     name="My Custom Model",
     key="my-custom-model",
     description="Fine-tuned GPT for legal analysis",
-    api_url="https://api.example.com/v1/completions",
+    api_url="https://api.example.com/v1/chat/completions",
     max_tokens=4096,
     api_key="sk-...",
 )
diff --git a/samples/core/custom_model.py b/samples/core/custom_model.py
index e660fb04..6ff83ec8 100644
--- a/samples/core/custom_model.py
+++ b/samples/core/custom_model.py
@@ -39,9 +39,14 @@ def main() -> None:
         name="My Custom Model",
         key="my-org/custom-model-v1",
         description="Custom fine-tuned model served via vLLM",
-        api_url="https://my-model-endpoint.example.com/v1",
+        api_url="https://my-model-endpoint.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
+        # Optional -- merged into every outgoing request body. Customer
+        # values win on conflict with our hardcoded defaults (we send
+        # `temperature: 0` for reproducible evals; override here for
+        # providers that reject it, e.g. {"temperature": 1}).
+        extra_payload={"top_p": 0.9},
     )
 
     if result:
@@ -69,7 +74,7 @@ def main() -> None:
 
     updated = client.models.update_custom(
         result.model_id,
-        api_url="https://my-new-endpoint.example.com/v1",
+        api_url="https://my-new-endpoint.example.com/v1/chat/completions",
     )
     if updated:
         print(f"\nCustom model {result.model_id} api_url updated")
diff --git a/src/layerlens/_version.py b/src/layerlens/_version.py
index fb6b8f67..9ce8ca24 100644
--- a/src/layerlens/_version.py
+++ b/src/layerlens/_version.py
@@ -1,4 +1,4 @@
-__version__ = "1.6.1"
+__version__ = "1.7.0"
 
 # Will be templated during the build
 __git_commit__ = "__GIT_COMMIT__"
diff --git a/src/layerlens/resources/models/models.py b/src/layerlens/resources/models/models.py
index 30ad5579..f29c2a97 100644
--- a/src/layerlens/resources/models/models.py
+++ b/src/layerlens/resources/models/models.py
@@ -220,6 +220,7 @@ def create_custom(
         api_url: str,
         max_tokens: int,
         api_key: Optional[str] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> Optional[CreateModelResponse]:
         """Create a custom model backed by an OpenAI-compatible API.
@@ -231,6 +232,11 @@ def create_custom(
             api_url: Base URL of the OpenAI-compatible API endpoint.
             max_tokens: Maximum number of tokens the model supports.
             api_key: Optional API key for the model provider.
+            extra_payload: Optional JSON object merged into every outgoing
+                chat-completions request body. Customer values win on conflict
+                with our defaults (e.g. set ``{"temperature": 1}`` for
+                providers that reject ``temperature: 0``). Keys ``messages``,
+                ``model`` and ``stream`` are reserved.
             timeout: Request timeout override.
 
         Returns:
@@ -246,6 +252,8 @@ def create_custom(
         }
         if api_key is not None:
             body["api_key"] = api_key
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
 
         resp = self._post(
             f"{base}/custom-models",
@@ -266,12 +274,13 @@ def update_custom(
         api_url: Optional[str] = None,
         api_key: Optional[str] = None,
         max_tokens: Optional[int] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> bool:
         """Update a custom model's mutable fields.
 
-        At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be
-        provided. Returns ``True`` on success.
+        At least one of ``api_url``, ``api_key``, ``max_tokens`` or
+        ``extra_payload`` must be provided. Returns ``True`` on success.
 
         Primary use case: repointing ``api_url`` for ephemeral vLLM endpoints
         behind cloudflared tunnels whose URL changes between sessions.
@@ -281,6 +290,9 @@ def update_custom(
             api_url: New base URL for the OpenAI-compatible API endpoint.
             api_key: New API key for the model provider.
             max_tokens: New maximum tokens value.
+            extra_payload: New JSON object merged into every outgoing request
+                body. Pass ``{}`` to clear the existing payload. See
+                ``create_custom`` for semantics.
             timeout: Request timeout override.
         """
         url = (
@@ -293,6 +305,8 @@ def update_custom(
             body["api_key"] = api_key
         if max_tokens is not None:
             body["max_tokens"] = max_tokens
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
         resp = self._patch(url, body=body, timeout=timeout, cast_to=dict)
         return isinstance(resp, dict) and "data" in resp
 
@@ -477,6 +491,7 @@ async def create_custom(
         api_url: str,
         max_tokens: int,
         api_key: Optional[str] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> Optional[CreateModelResponse]:
         """Create a custom model backed by an OpenAI-compatible API.
@@ -488,6 +503,11 @@ async def create_custom(
             api_url: Base URL of the OpenAI-compatible API endpoint.
             max_tokens: Maximum number of tokens the model supports.
             api_key: Optional API key for the model provider.
+            extra_payload: Optional JSON object merged into every outgoing
+                chat-completions request body. Customer values win on conflict
+                with our defaults (e.g. set ``{"temperature": 1}`` for
+                providers that reject ``temperature: 0``). Keys ``messages``,
+                ``model`` and ``stream`` are reserved.
             timeout: Request timeout override.
 
         Returns:
@@ -503,6 +523,8 @@ async def create_custom(
         }
         if api_key is not None:
             body["api_key"] = api_key
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
 
         resp = await self._post(
             f"{base}/custom-models",
@@ -523,12 +545,23 @@ async def update_custom(
         api_url: Optional[str] = None,
         api_key: Optional[str] = None,
         max_tokens: Optional[int] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> bool:
         """Update a custom model's mutable fields.
 
-        At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be
-        provided. Returns ``True`` on success.
+        At least one of ``api_url``, ``api_key``, ``max_tokens`` or
+        ``extra_payload`` must be provided. Returns ``True`` on success.
+
+        Args:
+            model_id: ID of the custom model to update.
+            api_url: New base URL for the OpenAI-compatible API endpoint.
+            api_key: New API key for the model provider.
+            max_tokens: New maximum tokens value.
+            extra_payload: New JSON object merged into every outgoing request
+                body. Pass ``{}`` to clear the existing payload. See
+                ``create_custom`` for semantics.
+            timeout: Request timeout override.
         """
         url = (
             f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/custom-models/{model_id}"
@@ -540,6 +573,8 @@ async def update_custom(
             body["api_key"] = api_key
         if max_tokens is not None:
             body["max_tokens"] = max_tokens
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
         resp = await self._patch(url, body=body, timeout=timeout, cast_to=dict)
         return isinstance(resp, dict) and "data" in resp
 
diff --git a/tests/resources/test_models_resource.py b/tests/resources/test_models_resource.py
index 6fbd4895..5f4c90fb 100644
--- a/tests/resources/test_models_resource.py
+++ b/tests/resources/test_models_resource.py
@@ -798,6 +798,44 @@ def test_create_custom_omits_api_key_when_none(self, models_resource):
         call_body = models_resource._post.call_args.kwargs["body"]
         assert "api_key" not in call_body
 
+    def test_create_custom_includes_extra_payload_when_provided(self, models_resource):
+        """create_custom() forwards extra_payload verbatim."""
+        models_resource._post.return_value = {
+            "status": "success",
+            "data": {"model_id": "x", "organization_id": "o", "project_id": "p"},
+        }
+
+        payload = {"top_p": 0.9, "provider": {"order": ["anthropic"]}}
+        models_resource.create_custom(
+            name="My Model",
+            key="my/model",
+            description="desc",
+            api_url="https://example.com/v1/chat/completions",
+            max_tokens=4096,
+            extra_payload=payload,
+        )
+
+        call_body = models_resource._post.call_args.kwargs["body"]
+        assert call_body["extra_payload"] == payload
+
+    def test_create_custom_omits_extra_payload_when_none(self, models_resource):
+        """create_custom() does not include extra_payload when not provided."""
+        models_resource._post.return_value = {
+            "status": "success",
+            "data": {"model_id": "x", "organization_id": "o", "project_id": "p"},
+        }
+
+        models_resource.create_custom(
+            name="My Model",
+            key="my/model",
+            description="desc",
+            api_url="https://example.com/v1/chat/completions",
+            max_tokens=4096,
+        )
+
+        call_body = models_resource._post.call_args.kwargs["body"]
+        assert "extra_payload" not in call_body
+
     def test_create_custom_correct_url(self, models_resource):
         """create_custom() posts to the correct endpoint."""
         models_resource._post.return_value = {
@@ -1208,6 +1246,36 @@ def test_update_custom_max_tokens_only(self, models_resource):
         body = models_resource._patch.call_args.kwargs["body"]
         assert body == {"max_tokens": 8192}
 
+    def test_update_custom_extra_payload_only(self, models_resource):
+        """update_custom() supports extra_payload-only updates."""
+        models_resource._patch.return_value = {"data": {"id": "model-1"}}
+
+        payload = {"temperature": 1, "top_p": 0.9}
+        result = models_resource.update_custom("model-1", extra_payload=payload)
+
+        assert result is True
+        body = models_resource._patch.call_args.kwargs["body"]
+        assert body == {"extra_payload": payload}
+
+    def test_update_custom_extra_payload_empty_dict_clears_payload(self, models_resource):
+        """update_custom(extra_payload={}) sends the empty dict so the backend clears the stored payload."""
+        models_resource._patch.return_value = {"data": {"id": "model-1"}}
+
+        result = models_resource.update_custom("model-1", extra_payload={})
+
+        assert result is True
+        body = models_resource._patch.call_args.kwargs["body"]
+        assert body == {"extra_payload": {}}
+
+    def test_update_custom_omits_extra_payload_when_none(self, models_resource):
+        """update_custom() does not include extra_payload when not provided."""
+        models_resource._patch.return_value = {"data": {"id": "model-1"}}
+
+        models_resource.update_custom("model-1", api_url="https://x.io")
+
+        body = models_resource._patch.call_args.kwargs["body"]
+        assert "extra_payload" not in body
+
     def test_update_custom_returns_false_on_error_envelope(self, models_resource):
         """update_custom() returns False when response has no data field."""
         models_resource._patch.return_value = {"code": "NOT_FOUND", "message": "missing"}