Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/oss/src/utils/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ class ServicesCodeConfig(BaseModel):
sandbox_runner: str = (
os.getenv("AGENTA_SERVICES_CODE_SANDBOX_RUNNER")
or os.getenv("AGENTA_SERVICES_SANDBOX_RUNNER")
or "local"
or "restricted"
)

model_config = ConfigDict(extra="ignore")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ description: "Write custom evaluators in Python, JavaScript, or TypeScript with

Custom code evaluators let you write your own evaluation logic in Python, JavaScript, or TypeScript. Your code has access to the application inputs, outputs, and the full execution trace (spans, latency, token usage, costs).

:::warning Self-hosted deployments only
On self-hosted Agenta, custom evaluator code runs server-side. By default it runs in a restricted Python sandbox (no filesystem, network, or host access). Operators can change the runner with the `AGENTA_SERVICES_CODE_SANDBOX_RUNNER` environment variable: `local` runs code with no sandbox (trusted authors only), `daytona` runs it in an isolated remote sandbox. See [environment configuration](/self-host/configuration). Agenta Cloud is unaffected — it isolates evaluator execution.
:::

## Function signature

Your code must define an `evaluate` function with the following signature:
Expand Down
10 changes: 10 additions & 0 deletions docs/docs/self-host/02-configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ This key has no env-var or `env.py` equivalent.
| `AGENTA_SERVICES_HOOK_ALLOW_INSECURE` | `agenta.services.hook.allow_insecure` | `agenta.services.hook.allowInsecure` |
| `AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED` | `agenta.services.middleware.caching_enabled` | `agenta.services.middleware.cachingEnabled` |

:::warning Custom-code evaluator runner
`AGENTA_SERVICES_CODE_SANDBOX_RUNNER` selects how [custom-code evaluators](/evaluation/configure-evaluators/custom-evaluator) execute:

- `restricted` (default) — in-process Python sandbox with limited builtins and an allowlist of pure-standard-library imports. No filesystem, network, or host access.
- `local` — raw execution in the services process with **no sandbox**. Any author who can create a custom-code evaluator can run arbitrary code on the host. Use only for trusted, single-tenant deployments.
- `daytona` — isolated remote sandbox (strongest). Recommended when evaluator authors are not fully trusted. Requires the [daytona](#daytona) credentials below.

The legacy `AGENTA_SERVICES_SANDBOX_RUNNER` is still accepted as a fallback.
:::

## Agenta — webhooks

| Env var | env.py path | values.yaml path |
Expand Down
4 changes: 3 additions & 1 deletion hosting/docker-compose/ee/env.ee.dev.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ AGENTA_CRYPT_KEY=replace-me
# ================================================================== #
# Agenta - Services (code/hook/middleware)
# ================================================================== #
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local
# Custom-code evaluator runner: restricted (default, in-process sandbox) | local
# (no sandbox, raw exec — trusted/single-tenant only) | daytona (isolated remote sandbox)
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted
# AGENTA_SERVICES_HOOK_ALLOW_INSECURE=true
# AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=true
# AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED=true
Expand Down
4 changes: 3 additions & 1 deletion hosting/docker-compose/ee/env.ee.gh.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ AGENTA_CRYPT_KEY=replace-me
# ================================================================== #
# Agenta - Services (code/hook/middleware)
# ================================================================== #
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local
# Custom-code evaluator runner: restricted (default, in-process sandbox) | local
# (no sandbox, raw exec — trusted/single-tenant only) | daytona (isolated remote sandbox)
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted
# AGENTA_SERVICES_HOOK_ALLOW_INSECURE=true
# AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=true
# AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED=true
Expand Down
4 changes: 3 additions & 1 deletion hosting/docker-compose/oss/env.oss.dev.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ AGENTA_CRYPT_KEY=replace-me
# ================================================================== #
# Agenta - Services (code/hook/middleware)
# ================================================================== #
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local
# Custom-code evaluator runner: restricted (default, in-process sandbox) | local
# (no sandbox, raw exec — trusted/single-tenant only) | daytona (isolated remote sandbox)
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted
# AGENTA_SERVICES_HOOK_ALLOW_INSECURE=true
# AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=true
# AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED=true
Expand Down
4 changes: 3 additions & 1 deletion hosting/docker-compose/oss/env.oss.gh.example
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ AGENTA_CRYPT_KEY=replace-me
# ================================================================== #
# Agenta - Services (code/hook/middleware)
# ================================================================== #
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local
# Custom-code evaluator runner: restricted (default, in-process sandbox) | local
# (no sandbox, raw exec — trusted/single-tenant only) | daytona (isolated remote sandbox)
# AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted
# AGENTA_SERVICES_HOOK_ALLOW_INSECURE=true
# AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=true
# AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED=true
Expand Down
4 changes: 2 additions & 2 deletions hosting/kubernetes/ee/values.ee.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ global:
# === agenta.services ===
# Consumed by the agenta SDK running inside services pods:
# - hook.allowInsecure → AGENTA_SERVICES_HOOK_ALLOW_INSECURE
# - code.sandboxRunner → AGENTA_SERVICES_CODE_SANDBOX_RUNNER (local|daytona)
# - code.sandboxRunner → AGENTA_SERVICES_CODE_SANDBOX_RUNNER (restricted|local|daytona; default restricted. local = no sandbox, trusted only)
# - middleware.authEnabled → AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED
# - middleware.cachingEnabled → AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED
# ================================================================== #
Expand All @@ -122,7 +122,7 @@ global:
# hook:
# allowInsecure: false
# code:
# sandboxRunner: local
# sandboxRunner: restricted
# middleware:
# authEnabled: true
# cachingEnabled: true
Expand Down
4 changes: 2 additions & 2 deletions hosting/kubernetes/oss/values.oss.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ agenta:
# === agenta.services ===
# Consumed by the agenta SDK running inside services pods:
# - hook.allowInsecure → AGENTA_SERVICES_HOOK_ALLOW_INSECURE
# - code.sandboxRunner → AGENTA_SERVICES_CODE_SANDBOX_RUNNER (local|daytona)
# - code.sandboxRunner → AGENTA_SERVICES_CODE_SANDBOX_RUNNER (restricted|local|daytona; default restricted. local = no sandbox, trusted only)
# - middleware.authEnabled → AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED
# - middleware.cachingEnabled → AGENTA_SERVICES_MIDDLEWARE_CACHING_ENABLED
# ================================================================== #
Expand All @@ -109,7 +109,7 @@ agenta:
# hook:
# allowInsecure: false
# code:
# sandboxRunner: local
# sandboxRunner: restricted
# middleware:
# authEnabled: true
# cachingEnabled: true
Expand Down
34 changes: 24 additions & 10 deletions sdks/python/agenta/sdk/engines/running/runners/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@

from agenta.sdk.engines.running.runners.base import CodeRunner
from agenta.sdk.engines.running.runners.local import LocalRunner
from agenta.sdk.engines.running.runners.restricted import RestrictedRunner
from agenta.sdk.utils.logging import get_module_logger

if TYPE_CHECKING:
from agenta.sdk.engines.running.runners.daytona import DaytonaRunner

log = get_module_logger(__name__)


def _get_daytona_runner() -> "DaytonaRunner":
from agenta.sdk.engines.running.runners.daytona import DaytonaRunner
Expand All @@ -20,34 +24,44 @@ def get_runner() -> CodeRunner:

Reads AGENTA_SERVICES_CODE_SANDBOX_RUNNER (canonical, v0.100.3+) with a
fallback to the legacy AGENTA_SERVICES_SANDBOX_RUNNER.
- "local" (default): Uses current container for local execution
- "daytona": Uses Daytona remote sandbox
- "restricted" (default): In-process RestrictedPython sandbox (allowlisted imports).
- "local": Raw exec in the current process — no sandbox. Trusted deployments only.
- "daytona": Remote Daytona sandbox (strongest isolation).

Returns:
CodeRunner: An instance of LocalRunner or DaytonaRunner
CodeRunner: An instance of RestrictedRunner, LocalRunner, or DaytonaRunner

Raises:
ValueError: If Daytona runner is selected but required environment variables are missing
ValueError: If an unknown runner is selected, or Daytona is selected but its
required environment variables are missing.
"""
runner_type = (
os.getenv("AGENTA_SERVICES_CODE_SANDBOX_RUNNER")
or os.getenv("AGENTA_SERVICES_SANDBOX_RUNNER")
or "local"
or "restricted"
).lower()

if runner_type == "daytona":
if runner_type == "restricted":
return RestrictedRunner()
elif runner_type == "local":
log.warning(
"Custom-code evaluators are using the 'local' runner: user code runs with "
"raw exec() and no sandbox in this process. Use it only for trusted/"
"single-tenant deployments. Set AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted "
"(default) or =daytona for untrusted evaluator authors."
)
return LocalRunner()
elif runner_type == "daytona":
try:
return _get_daytona_runner()
except ImportError as exc:
raise ValueError(
"Daytona runner requires the 'daytona' package. "
"Install optional dependencies or set "
"AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local."
"AGENTA_SERVICES_CODE_SANDBOX_RUNNER=restricted."
) from exc
elif runner_type == "local":
return LocalRunner()
else:
raise ValueError(
f"Unknown AGENTA_SERVICES_CODE_SANDBOX_RUNNER value: {runner_type}. "
f"Supported values: 'local', 'daytona'"
f"Supported values: 'restricted', 'local', 'daytona'"
)
182 changes: 182 additions & 0 deletions sdks/python/agenta/sdk/engines/running/runners/restricted.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import builtins as _py_builtins
from typing import Any, Dict, Union, Optional

from RestrictedPython import compile_restricted, safe_builtins, PrintCollector
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
from RestrictedPython.Guards import (
safer_getattr,
guarded_iter_unpack_sequence,
full_write_guard,
)

from agenta.sdk.engines.running.runners.base import CodeRunner


# Pure data/iteration builtins that RestrictedPython's safe_builtins omits but
# evaluators routinely need. All operate on data only — none reach the host or
# the class graph, so adding them does not widen the sandbox (escapes go through
# attribute access, which safer_getattr blocks).
_SAFE_EXTRA_BUILTINS = (
"dict",
"list",
"set",
"frozenset",
"min",
"max",
"sum",
"enumerate",
"map",
"filter",
"reversed",
"all",
"any",
)


# Pure-computation stdlib modules only: no filesystem, network, or process reach.
# Deliberately strict — anything that can touch the host (os, subprocess, sys,
# pathlib, socket, importlib, io, shutil, ...) or the network (httpx, urllib,
# requests, ...) is excluded. Operators who need unrestricted execution must opt
# into the `local` runner; hostile multi-tenant should use `daytona`.
_ALLOWED_IMPORTS = frozenset(
{
"math",
"statistics",
"datetime",
"json",
"re",
"random",
"string",
"typing",
"collections",
"itertools",
"functools",
}
)


def _safe_import(name, globals=None, locals=None, fromlist=(), level=0):
"""Guarded ``__import__`` that only permits the pure-stdlib allowlist.

Replaces the real ``__import__`` inside the sandbox so user evaluator code
cannot import host-reaching modules. Relative imports (``level != 0``) are
rejected outright.
"""
root = name.split(".")[0]
if level != 0 or root not in _ALLOWED_IMPORTS:
raise ImportError(
f"Import of '{name}' is not allowed in the restricted evaluator sandbox. "
f"Allowed modules: {', '.join(sorted(_ALLOWED_IMPORTS))}. "
"To run unrestricted evaluator code set "
"AGENTA_SERVICES_CODE_SANDBOX_RUNNER=local (trusted deployments only)."
)
return __import__(name, globals, locals, fromlist, level)


def _build_restricted_globals() -> Dict[str, Any]:
"""Build the execution globals for RestrictedPython.

Closes the two holes the previous sandbox had:
1. it injected the real ``__import__`` (here: a guarded allowlist import), and
2. it never set ``_getattr_`` (here: ``safer_getattr`` blocks dunder/underscore
attribute access, which defeats the ``().__class__.__bases__`` gadget escape).
"""
builtins = dict(safe_builtins)
for name in _SAFE_EXTRA_BUILTINS:
builtins[name] = getattr(_py_builtins, name)
builtins["__import__"] = _safe_import

return {
"__builtins__": builtins,
"_getattr_": safer_getattr,
"_getitem_": default_guarded_getitem,
"_getiter_": default_guarded_getiter,
"_iter_unpack_sequence_": guarded_iter_unpack_sequence,
"_write_": full_write_guard,
# print() goes through PrintCollector (captured, not real stdout).
"_print_": PrintCollector,
}


class RestrictedRunner(CodeRunner):
"""Default code runner: executes evaluator code in an in-process RestrictedPython sandbox."""

def run(
self,
code: str,
app_params: Dict[str, Any],
inputs: Dict[str, Any],
output: Union[dict, str],
correct_answer: Any,
runtime: Optional[str] = None,
templates: Optional[Dict[str, str]] = None,
*,
version: str = "1",
trace: Optional[Dict[str, Any]] = None,
) -> Union[float, None]:
"""
Execute provided Python code in a RestrictedPython sandbox.

Args:
code: The Python code to be executed
app_params: The parameters of the app variant (v1 only)
inputs: Inputs to be used during code execution
output: The output of the app variant after being called
correct_answer: The correct answer (or target) for comparison (v1 only)
runtime: Runtime environment (only "python" is supported)
templates: Wrapper templates keyed by runtime (unused for in-process runners).
version: Evaluator interface version ("1" = legacy, "2" = new)
trace: Full trace data (v2 only)

Returns:
Float score between 0 and 1, or None if execution fails
"""
# Normalize runtime: None means python
runtime = runtime or "python"

# The restricted sandbox runs in-process and only supports Python.
if runtime != "python":
raise ValueError(
f"RestrictedRunner only supports 'python' runtime, got: {runtime}. "
"Use the Daytona runner for javascript/typescript."
)

try:
byte_code = compile_restricted(code, filename="<inline>", mode="exec")
except SyntaxError as e:
raise SyntaxError(f"Syntax error in provided code: {e}")

environment = _build_restricted_globals()

try:
exec(byte_code, environment)

fn = environment["evaluate"]

if version == "2":
result = fn(inputs, output, trace)
else:
result = fn(app_params, inputs, output, correct_answer)

# Attempt to convert result to float
if isinstance(result, (float, int, str)):
try:
result = float(result)
except ValueError as e:
raise ValueError(f"Result cannot be converted to float: {e}")

if not isinstance(result, float):
raise TypeError(
f"Result is not a float after conversion: {type(result)}"
)

return result

except KeyError as e:
raise KeyError(f"Missing expected key in environment: {e}")

except SyntaxError as e:
raise SyntaxError(f"Syntax error in provided code: {e}")

except Exception as e:
raise RuntimeError(f"Error during code execution: {e}")
17 changes: 0 additions & 17 deletions sdks/python/agenta/sdk/engines/running/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,6 @@
_runner = None


def is_import_safe(python_code: Text) -> bool:
"""Checks if the imports in the python code contains a system-level import.

Args:
python_code (str): The Python code to be executed

Returns:
bool - module is secured or not
"""

disallowed_imports = ["os", "subprocess", "threading", "multiprocessing"]
for import_ in disallowed_imports:
if import_ in python_code:
return False
return True


def execute_code_safely(
app_params: Dict[str, Any],
inputs: Dict[str, Any],
Expand Down
Loading
Loading