LayerLens · m-peko · May 20, 2026 · May 19, 2026 · May 19, 2026 · May 20, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to the Stratix Python SDK will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-**Latest version:** [1.6.1](https://github.com/LayerLens/stratix-python/releases/tag/v1.6.1) — 2026-05-15
+**Latest version:** [1.7.0](https://github.com/LayerLens/stratix-python/releases/tag/v1.7.0) — 2026-05-20
 
 ## [Unreleased]
 
@@ -21,6 +21,12 @@ Things we're actively working on. Want to help? Check the [issues](https://githu
 
 ### Removed
 
+## [1.7.0] - 2026-05-20
+
+### Added
+
+- `extra_payload` parameter on `models.create_custom` and `models.update_custom` (sync + async). Optional JSON object merged into every outgoing chat-completions request body; customer values win on conflict with our hardcoded defaults. Lets customers add provider-specific fields (`top_p`, `max_completion_tokens`) or override values like `temperature` for providers that reject our defaults.
+
 ## [1.6.1] - 2026-05-15
 
 ### Added

diff --git a/docs/README.md b/docs/README.md
@@ -176,9 +176,12 @@ response = client.models.create_custom(
     name="My Fine-tuned Model",
     key="my-org/custom-model-v1",
     description="Fine-tuned GPT for medical Q&A",
-    api_url="https://my-api.example.com/v1",
+    api_url="https://my-api.example.com/v1/chat/completions",
     max_tokens=4096,
     api_key=os.environ.get("MY_PROVIDER_API_KEY"),  # optional
+    # Optional — merged into every request body. Useful for provider-specific
+    # fields or for overriding our defaults (e.g. {"temperature": 1}).
+    extra_payload={"top_p": 0.9},
 )
 print(f"Created model: {response.model_id}")
 ```

diff --git a/docs/api-reference/models-benchmarks.md b/docs/api-reference/models-benchmarks.md
@@ -143,21 +143,28 @@ client = Stratix()
 success = client.models.remove("model-id-1", "model-id-2")
 ```
 
-### `create_custom(name, key, description, api_url, max_tokens, api_key=None, timeout=None)`
+### `create_custom(name, key, description, api_url, max_tokens, api_key=None, extra_payload=None, timeout=None)`
 
 Creates a custom model backed by an OpenAI-compatible API endpoint. This allows you to evaluate any model accessible via a chat completions endpoint.
 
 #### Parameters
 
-| Parameter     | Type                             | Required | Description                                                                       |
-| ------------- | -------------------------------- | -------- | --------------------------------------------------------------------------------- |
-| `name`        | `str`                            | Yes      | Model name (max 256 characters)                                                   |
-| `key`         | `str`                            | Yes      | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)|
-| `description` | `str`                            | Yes      | Model description (max 500 characters)                                            |
-| `api_url`     | `str`                            | Yes      | Base URL of the OpenAI-compatible API endpoint                                    |
-| `max_tokens`  | `int`                            | Yes      | Maximum number of tokens the model supports                                       |
-| `api_key`     | `str \| None`                    | No       | API key for the model provider                                                    |
-| `timeout`     | `float \| httpx.Timeout \| None` | No       | Override request timeout                                                          |
+| Parameter       | Type                              | Required | Description                                                                       |
+| --------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------- |
+| `name`          | `str`                             | Yes      | Model name (max 256 characters)                                                   |
+| `key`           | `str`                             | Yes      | Unique model key, lowercase alphanumeric with dots/hyphens/slashes (max 256 chars)|
+| `description`   | `str`                             | Yes      | Model description (max 500 characters)                                            |
+| `api_url`       | `str`                             | Yes      | Full URL of the OpenAI-compatible chat completions endpoint                       |
+| `max_tokens`    | `int`                             | Yes      | Maximum number of tokens the model supports                                       |
+| `api_key`       | `str \| None`                     | No       | API key for the model provider                                                    |
+| `extra_payload` | `Dict[str, Any] \| None`          | No       | JSON object merged into every outgoing chat-completions request body (see below)  |
+| `timeout`       | `float \| httpx.Timeout \| None`  | No       | Override request timeout                                                          |
+
+#### `extra_payload` semantics
+
+When set, the keys/values in `extra_payload` are deep-merged into every outgoing request body. Customer values **win on conflict** with our hardcoded defaults — use it to override `temperature` (we send `0` for reproducible evaluations) or to add provider-specific fields like `top_p`, `presence_penalty`, or `max_completion_tokens` (required by some OpenAI reasoning models that reject `max_tokens`).
+
+The keys `messages`, `model`, and `stream` are reserved and will be rejected.
 
 #### Returns
 
@@ -178,28 +185,31 @@ result = client.models.create_custom(
     name="My Custom Model",
     key="my-org/custom-model-v1",
     description="Custom fine-tuned model served via vLLM",
-    api_url="https://my-model-endpoint.example.com/v1",
+    api_url="https://my-model-endpoint.example.com/v1/chat/completions",
     api_key="my-provider-api-key",
     max_tokens=4096,
+    # Optional — provider-specific overrides merged into every request body.
+    extra_payload={"top_p": 0.9},
 )
 
 if result:
     print(f"Created model: {result.model_id}")
 ```
 
-### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, timeout=None)`
+### `update_custom(model_id, *, api_url=None, api_key=None, max_tokens=None, extra_payload=None, timeout=None)`
 
-Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, or `max_tokens` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions.
+Updates a custom model's mutable fields. At least one of `api_url`, `api_key`, `max_tokens`, or `extra_payload` must be provided. Primary use case: repointing `api_url` for ephemeral vLLM endpoints behind cloudflared tunnels whose URL changes between sessions.
 
 #### Parameters
 
-| Parameter    | Type                             | Required | Description                                              |
-| ------------ | -------------------------------- | -------- | -------------------------------------------------------- |
-| `model_id`   | `str`                            | Yes      | ID of the custom model to update                         |
-| `api_url`    | `str \| None`                    | No       | New base URL for the OpenAI-compatible API endpoint      |
-| `api_key`    | `str \| None`                    | No       | New API key for the model provider                       |
-| `max_tokens` | `int \| None`                    | No       | New maximum tokens value                                 |
-| `timeout`    | `float \| httpx.Timeout \| None` | No       | Override request timeout                                 |
+| Parameter       | Type                              | Required | Description                                                                      |
+| --------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------- |
+| `model_id`      | `str`                             | Yes      | ID of the custom model to update                                                 |
+| `api_url`       | `str \| None`                     | No       | New full URL of the OpenAI-compatible chat completions endpoint                  |
+| `api_key`       | `str \| None`                     | No       | New API key for the model provider                                               |
+| `max_tokens`    | `int \| None`                     | No       | New maximum tokens value                                                         |
+| `extra_payload` | `Dict[str, Any] \| None`          | No       | New JSON object merged into every outgoing request. Pass `{}` to clear it.       |
+| `timeout`       | `float \| httpx.Timeout \| None`  | No       | Override request timeout                                                         |
 
 #### Returns
 
@@ -213,7 +223,13 @@ client = Stratix()
 # Repoint the api_url without re-creating the model
 client.models.update_custom(
     "model-id-from-create-custom",
-    api_url="https://my-new-endpoint.example.com/v1",
+    api_url="https://my-new-endpoint.example.com/v1/chat/completions",
+)
+
+# Override request parameters for a model that doesn't accept temperature=0
+client.models.update_custom(
+    "model-id-from-create-custom",
+    extra_payload={"temperature": 1},
 )
 ```
 

diff --git a/docs/examples/models-and-benchmarks.md b/docs/examples/models-and-benchmarks.md
@@ -115,7 +115,7 @@ def main():
         name="My Custom Model",
         key="my-org/custom-model-v1",
         description="Custom fine-tuned model served via vLLM",
-        api_url="https://my-model-endpoint.example.com/v1",
+        api_url="https://my-model-endpoint.example.com/v1/chat/completions",
         api_key=os.environ["MY_PROVIDER_API_KEY"],
         max_tokens=4096,
     )
@@ -152,7 +152,7 @@ def main():
         name="My Tunnel-backed Model",
         key="my-org/tunnel-model-v1",
         description="vLLM served behind a cloudflared tunnel",
-        api_url="https://tunnel-1.example.com/v1",
+        api_url="https://tunnel-1.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
     )
@@ -161,7 +161,7 @@ def main():
     # Later, when the tunnel URL changes:
     client.models.update_custom(
         result.model_id,
-        api_url="https://tunnel-2.example.com/v1",
+        api_url="https://tunnel-2.example.com/v1/chat/completions",
     )
 
     # Run evaluations as usual — the model now points at the new endpoint.
@@ -190,7 +190,7 @@ def main():
         name="My Custom Model",
         key="my-org/custom-model-v2",
         description="Replacement after schema migration",
-        api_url="https://my-endpoint.example.com/v1",
+        api_url="https://my-endpoint.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
     )

diff --git a/samples/claude-code/skills/benchmark.md b/samples/claude-code/skills/benchmark.md
@@ -38,7 +38,7 @@ model = client.models.create_custom(
     name="My Custom Model",
     key="my-custom-model",
     description="Fine-tuned GPT for legal analysis",
-    api_url="https://api.example.com/v1/completions",
+    api_url="https://api.example.com/v1/chat/completions",
     max_tokens=4096,
     api_key="sk-...",
 )

diff --git a/samples/core/custom_model.py b/samples/core/custom_model.py
@@ -39,9 +39,14 @@ def main() -> None:
         name="My Custom Model",
         key="my-org/custom-model-v1",
         description="Custom fine-tuned model served via vLLM",
-        api_url="https://my-model-endpoint.example.com/v1",
+        api_url="https://my-model-endpoint.example.com/v1/chat/completions",
         api_key="my-provider-api-key",
         max_tokens=4096,
+        # Optional -- merged into every outgoing request body. Customer
+        # values win on conflict with our hardcoded defaults (we send
+        # `temperature: 0` for reproducible evals; override here for
+        # providers that reject it, e.g. {"temperature": 1}).
+        extra_payload={"top_p": 0.9},
     )
 
     if result:
@@ -69,7 +74,7 @@ def main() -> None:
 
     updated = client.models.update_custom(
         result.model_id,
-        api_url="https://my-new-endpoint.example.com/v1",
+        api_url="https://my-new-endpoint.example.com/v1/chat/completions",
     )
     if updated:
         print(f"\nCustom model {result.model_id} api_url updated")

diff --git a/src/layerlens/_version.py b/src/layerlens/_version.py
@@ -1,4 +1,4 @@
-__version__ = "1.6.1"
+__version__ = "1.7.0"
 
 # Will be templated during the build
 __git_commit__ = "__GIT_COMMIT__"
diff --git a/src/layerlens/resources/models/models.py b/src/layerlens/resources/models/models.py
@@ -220,6 +220,7 @@ def create_custom(
         api_url: str,
         max_tokens: int,
         api_key: Optional[str] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> Optional[CreateModelResponse]:
         """Create a custom model backed by an OpenAI-compatible API.
@@ -231,6 +232,11 @@ def create_custom(
             api_url: Base URL of the OpenAI-compatible API endpoint.
             max_tokens: Maximum number of tokens the model supports.
             api_key: Optional API key for the model provider.
+            extra_payload: Optional JSON object merged into every outgoing
+                chat-completions request body. Customer values win on conflict
+                with our defaults (e.g. set ``{"temperature": 1}`` for
+                providers that reject ``temperature: 0``). Keys ``messages``,
+                ``model`` and ``stream`` are reserved.
             timeout: Request timeout override.
 
         Returns:
@@ -246,6 +252,8 @@ def create_custom(
         }
         if api_key is not None:
             body["api_key"] = api_key
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
 
         resp = self._post(
             f"{base}/custom-models",
@@ -266,12 +274,13 @@ def update_custom(
         api_url: Optional[str] = None,
         api_key: Optional[str] = None,
         max_tokens: Optional[int] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> bool:
         """Update a custom model's mutable fields.
 
-        At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be
-        provided. Returns ``True`` on success.
+        At least one of ``api_url``, ``api_key``, ``max_tokens`` or
+        ``extra_payload`` must be provided. Returns ``True`` on success.
 
         Primary use case: repointing ``api_url`` for ephemeral vLLM endpoints
         behind cloudflared tunnels whose URL changes between sessions.
@@ -281,6 +290,9 @@ def update_custom(
             api_url: New base URL for the OpenAI-compatible API endpoint.
             api_key: New API key for the model provider.
             max_tokens: New maximum tokens value.
+            extra_payload: New JSON object merged into every outgoing request
+                body. Pass ``{}`` to clear the existing payload. See
+                ``create_custom`` for semantics.
             timeout: Request timeout override.
         """
         url = (
@@ -293,6 +305,8 @@ def update_custom(
             body["api_key"] = api_key
         if max_tokens is not None:
             body["max_tokens"] = max_tokens
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
         resp = self._patch(url, body=body, timeout=timeout, cast_to=dict)
         return isinstance(resp, dict) and "data" in resp
 
@@ -477,6 +491,7 @@ async def create_custom(
         api_url: str,
         max_tokens: int,
         api_key: Optional[str] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> Optional[CreateModelResponse]:
         """Create a custom model backed by an OpenAI-compatible API.
@@ -488,6 +503,11 @@ async def create_custom(
             api_url: Base URL of the OpenAI-compatible API endpoint.
             max_tokens: Maximum number of tokens the model supports.
             api_key: Optional API key for the model provider.
+            extra_payload: Optional JSON object merged into every outgoing
+                chat-completions request body. Customer values win on conflict
+                with our defaults (e.g. set ``{"temperature": 1}`` for
+                providers that reject ``temperature: 0``). Keys ``messages``,
+                ``model`` and ``stream`` are reserved.
             timeout: Request timeout override.
 
         Returns:
@@ -503,6 +523,8 @@ async def create_custom(
         }
         if api_key is not None:
             body["api_key"] = api_key
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
 
         resp = await self._post(
             f"{base}/custom-models",
@@ -523,12 +545,23 @@ async def update_custom(
         api_url: Optional[str] = None,
         api_key: Optional[str] = None,
         max_tokens: Optional[int] = None,
+        extra_payload: Optional[Dict[str, Any]] = None,
         timeout: float | httpx.Timeout | None = DEFAULT_TIMEOUT,
     ) -> bool:
         """Update a custom model's mutable fields.
 
-        At least one of ``api_url``, ``api_key``, or ``max_tokens`` must be
-        provided. Returns ``True`` on success.
+        At least one of ``api_url``, ``api_key``, ``max_tokens`` or
+        ``extra_payload`` must be provided. Returns ``True`` on success.
+
+        Args:
+            model_id: ID of the custom model to update.
+            api_url: New base URL for the OpenAI-compatible API endpoint.
+            api_key: New API key for the model provider.
+            max_tokens: New maximum tokens value.
+            extra_payload: New JSON object merged into every outgoing request
+                body. Pass ``{}`` to clear the existing payload. See
+                ``create_custom`` for semantics.
+            timeout: Request timeout override.
         """
         url = (
             f"/organizations/{self._client.organization_id}/projects/{self._client.project_id}/custom-models/{model_id}"
@@ -540,6 +573,8 @@ async def update_custom(
             body["api_key"] = api_key
         if max_tokens is not None:
             body["max_tokens"] = max_tokens
+        if extra_payload is not None:
+            body["extra_payload"] = extra_payload
         resp = await self._patch(url, body=body, timeout=timeout, cast_to=dict)
         return isinstance(resp, dict) and "data" in resp