From 42a49eeb510b0ed62b76bd76aedee0ad497e946a Mon Sep 17 00:00:00 2001 From: jrob5756 Date: Fri, 5 Jun 2026 09:21:25 -0400 Subject: [PATCH 1/2] feat: opt-in skills for Conductor-aware agents (#180) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a generalized skills capability so provider-backed agents can opt into bundled, reusable knowledge or capabilities via a single named list. Phase 1 ships one built-in skill — `conductor` — sourced from plugins/conductor/skills/conductor/ (the same canonical content the Copilot CLI plugin consumes; no duplication). New code - src/conductor/skills/ — registry (built-in name → directory), loader (SKILL.md + references/*.md → eager preamble), public surface - examples/skills-self-improving-workflow.yaml — end-to-end example - tests/test_skills/ — 51 tests covering registry, loader, schema field validation, and executor integration on both provider variants Schema - AgentDef.skills: list[str] | None — tri-state per-agent opt-in (omitted = inherit; [] = explicit none; [name, …] = explicit set). Forbidden on script / human_gate / workflow / wait / set / terminate. - RuntimeConfig.skills: list[str] — workflow-wide default for every provider-backed agent. - field_validator on both sites resolves every name through the registry so unknown skills fail at `conductor validate` time. Provider parity (the user-facing contract is identical — "the agent has access to the named skill" — but the mechanism differs): - Copilot: supports_native_skills=True. Executor forwards resolved directories on execute(skill_directories=…); the provider passes them through to session_kwargs["skill_directories"] for native progressive disclosure via SKILL.md frontmatter. - Claude: supports_native_skills=False (default). Executor reads SKILL.md + references/*.md and eager-injects them into the rendered prompt inside . The provider accepts the skill_directories kwarg for signature parity and immediately discards it (documented in the docstring). - Claude Agent SDK: same eager-inject path as Claude — the upstream claude-agent-sdk surfaces no skill kwarg today. If/when it does, flip supports_native_skills to True. ProviderCapabilities - Adds a skills: bool field to the descriptor (defaults to False so unaudited providers fail validation loudly rather than silently). - All three providers declare skills=True. - declared_limitations() lists "no skills support" for providers that opt out. Wheel packaging - pyproject.toml force-includes plugins/conductor/skills/conductor under the conductor namespace so the bundled skill ships with the installed wheel, not just the source checkout. The registry probes both layouts (editable install + wheel install) before falling back to a deterministic SkillNotFoundError. Closes #180 (Phase 1). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 7 + examples/skills-self-improving-workflow.yaml | 161 ++++++++++++++++++ plugins/conductor/skills/conductor/SKILL.md | 2 +- .../skills/conductor/references/authoring.md | 54 ++++-- .../conductor/references/yaml-schema.md | 41 ++--- pyproject.toml | 3 + src/conductor/config/schema.py | 113 ++++++++++++ src/conductor/engine/workflow.py | 7 + src/conductor/executor/agent.py | 82 ++++++++- src/conductor/providers/base.py | 33 ++++ src/conductor/providers/capabilities.py | 24 +++ src/conductor/providers/claude.py | 11 ++ src/conductor/providers/claude_agent_sdk.py | 12 ++ src/conductor/providers/copilot.py | 37 ++++ src/conductor/skills/__init__.py | 46 +++++ src/conductor/skills/loader.py | 97 +++++++++++ src/conductor/skills/registry.py | 131 ++++++++++++++ tests/test_engine/test_workflow.py | 1 + tests/test_engine/test_workflow_interrupt.py | 1 + .../test_existing_workflows_integration.py | 2 +- .../test_integration/test_mixed_providers.py | 3 +- tests/test_providers/test_capabilities.py | 3 + tests/test_providers/test_copilot_skills.py | 91 ++++++++++ tests/test_providers/test_registry.py | 1 + tests/test_skills/__init__.py | 0 .../test_skills/test_executor_integration.py | 155 +++++++++++++++++ tests/test_skills/test_loader.py | 55 ++++++ tests/test_skills/test_registry.py | 62 +++++++ tests/test_skills/test_schema.py | 93 ++++++++++ 29 files changed, 1288 insertions(+), 40 deletions(-) create mode 100644 examples/skills-self-improving-workflow.yaml create mode 100644 src/conductor/skills/__init__.py create mode 100644 src/conductor/skills/loader.py create mode 100644 src/conductor/skills/registry.py create mode 100644 tests/test_providers/test_copilot_skills.py create mode 100644 tests/test_skills/__init__.py create mode 100644 tests/test_skills/test_executor_integration.py create mode 100644 tests/test_skills/test_loader.py create mode 100644 tests/test_skills/test_registry.py create mode 100644 tests/test_skills/test_schema.py diff --git a/AGENTS.md b/AGENTS.md index 56ce1a33..08b047c6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -76,6 +76,11 @@ make validate-examples # validate all examples - `loader.py` - YAML parsing with environment variable resolution (${VAR:-default}) and `!file` tag support - `validator.py` - Cross-reference validation (agent names, routes, parallel groups) +- **skills/**: Skill registry and loader (opt-in, bundled skill content) + - `registry.py` - Resolves built-in skill names to on-disk directories (probes editable-install + wheel-install layouts) + - `loader.py` - Reads `SKILL.md` + `references/*.md` for providers that require eager preamble injection; wraps each skill in `` tags inside a `` envelope + - Built-in skills live under `plugins/conductor/skills//` (bundled into the wheel via hatchling `force-include`) + - **engine/**: Workflow execution orchestration - `workflow.py` - Main `WorkflowEngine` class that orchestrates agent execution, parallel groups, for-each groups, and routing - `context.py` - `WorkflowContext` manages accumulated agent outputs with three modes: accumulate, last_only, explicit @@ -135,6 +140,7 @@ make validate-examples # validate all examples - **Tool resolution**: `null` = all workflow tools, `[]` = none, `[list]` = subset - **Set step typing**: `output_type` defaults to `auto` (safe YAML parse with `_to_json_safe` normalisation — `datetime`/`date`/`time` → ISO 8601, non-string dict keys and other non-JSON-safe values raise `ExecutionError`). Explicit `string`/`number`/`integer`/`boolean`/`list`/`dict` only valid on single `value:`. `WorkflowContext.store` accepts any JSON-safe value (scalars/lists from `set` steps in addition to the dicts produced by LLM / script / gate / parallel-group outputs); `_add_agent_input` returns the scalar verbatim for `step.output` and raises a clear `KeyError` for `step.output.field` shorthand on non-dict outputs. - **Reasoning effort**: `runtime.default_reasoning_effort` sets a workflow-wide default; per-agent `reasoning.effort` overrides it. Allowed values: `low`, `medium`, `high`, `xhigh`. Each provider translates the unified value to its native API (Copilot: `reasoning_effort` on the session, validated against the model's `supported_reasoning_efforts`; Claude: extended thinking with budget mapping low=2048, medium=8192, high=16384, xhigh=32768 tokens, with `temperature` coerced to 1.0 and `max_tokens` bumped to fit the budget). See `examples/reasoning-effort.yaml`. +- **Skills**: `runtime.skills: [name, ...]` sets a workflow-wide default list of skills enabled for every provider-backed agent; per-agent `skills: [name, ...]` overrides it (tri-state via list presence: omitted = inherit, `skills: []` = explicit opt-out, `skills: [name, ...]` = explicit set). Skill names must resolve to a registered built-in (currently just `conductor`). The observable contract is the same across providers — *"the agent has access to the named skill"* — but the mechanism differs by provider via `AgentProvider.supports_native_skills`: **Copilot** (`True`) registers the skill directory on the SDK session via `skill_directories`, so the agent discovers and loads skill content natively (progressive disclosure via `SKILL.md` frontmatter); **Claude** and **Claude Agent SDK** (`False`) eagerly inject every enabled skill's `SKILL.md` plus `references/*.md` into the agent's rendered prompt inside `...` tags. Providers also declare `skills: bool` on their `ProviderCapabilities` descriptor so `conductor validate` can catch skills-against-unsupported-provider mismatches. Built-in skills live under `plugins/conductor/skills//` and are bundled into the wheel via the hatchling `force-include` entry in `pyproject.toml`. Skills are rejected on non-provider-backed step types (script, wait, set, terminate, workflow, human_gate). See `examples/skills-self-improving-workflow.yaml`. - **Terminate steps** (`type: terminate`): explicit terminal step with `status` (`success` | `failed`), Jinja2 `reason`, and optional `output_template` (a `dict[str, str]` that replaces `workflow.output:` when set; each value is rendered then passed through `_maybe_parse_json` so `"true"` becomes `True`, `"42"` becomes `42`, JSON literals are parsed). Reaching a terminate step ends the workflow immediately (no routes evaluated after). `success` → CLI exit 0, dashboard ✅, `workflow_completed { termination_reason, terminated_by, is_explicit: true, status }`; runs `on_complete` hook. `failed` → CLI exit 1 (with rendered output JSON still printed to stdout for downstream tooling), dashboard ❌, raises `WorkflowTerminated` (subclass of `ExecutionError`), emits `workflow_failed { error_type: "WorkflowTerminated", is_explicit: true, status, output }`, runs `on_error` hook, and **does not** save an on-failure checkpoint (explicit terminations are intentionally non-resumable). Terminate steps cannot have `routes`, `tools`, `output`, `prompt`, `model`, etc.; cannot be used as parallel-group members or as a for_each inline agent (route to one from those groups' `routes:` instead). Inside a sub-workflow, a `status: failed` terminate is downgraded at the parent boundary to `SubworkflowTerminatedError` (also a subclass of `ExecutionError`) preserving the child's rendered `terminated_output` / `terminated_reason` / `terminated_by` as structured attributes — the parent treats it as a normal sub-workflow failure (its own `workflow_failed` does NOT inherit `is_explicit: true`). For more detail see `examples/terminate.yaml`, `docs/workflow-syntax.md` (Terminate Steps section), and `plugins/conductor/skills/conductor/references/authoring.md`. - **Structured `runtime.provider` (Copilot custom routing)**: `runtime.provider` accepts either the bare string shorthand (`provider: copilot`) or a structured `ProviderSettings` object that routes the Copilot SDK at OpenAI-compatible / Azure / Anthropic endpoints (Ollama, vLLM, LM Studio, Azure OpenAI, etc.). Object fields: `name` (defaults to `copilot`), `type` (`openai`|`azure`|`anthropic`), `wire_api` (`completions`|`responses`), `base_url`, `api_key`, `bearer_token`, `headers`, `azure.api_version`. `api_key` and `bearer_token` are `SecretStr` (redacted in `model_dump` / dashboard / event logs). The model is frozen after construction. Custom routing activates only when at least one non-`name` field is set in YAML — ambient `OPENAI_*` env vars never divert default routing on their own. Once activated, missing fields fall back from env vars in this order: `base_url` ← `COPILOT_PROVIDER_BASE_URL` → `OPENAI_BASE_URL`; `api_key` ← `COPILOT_PROVIDER_API_KEY` (only — ambient `OPENAI_API_KEY` is intentionally NOT a fallback to avoid credential leaks); `bearer_token` ← `COPILOT_PROVIDER_BEARER_TOKEN`. The schema rejects every non-`name` field when `name != "copilot"` (structured config for other providers is a follow-up). It also rejects anchorless / broken combinations that would silently no-op at the SDK boundary: `wire_api` / `type` / `headers` / `azure` cannot stand alone without `base_url` / `api_key` / `bearer_token`; empty `headers`, empty `SecretStr`, and `azure: {api_version: null}` are rejected. The resolver raises `ProviderError` when custom routing is activated but every resolved field is falsy (e.g. expected env vars all unset). Custom routing applies to both agent execution and dialog turns so all sessions hit the same endpoint. `--provider ` CLI override replaces the whole `ProviderSettings` (logs a notice when YAML had structured fields). See `examples/copilot-local-llm.yaml`. @@ -170,6 +176,7 @@ Tests mirror source structure in `tests/`: - `test_providers/` - Provider implementation tests - `test_integration/` - Full workflow execution tests - `test_gates/` - Human gate tests +- `test_skills/` - Skill registry, loader, schema field, and executor-integration tests Use `pytest.mark.performance` for performance tests (exclude with `-m "not performance"`). diff --git a/examples/skills-self-improving-workflow.yaml b/examples/skills-self-improving-workflow.yaml new file mode 100644 index 00000000..df49dd43 --- /dev/null +++ b/examples/skills-self-improving-workflow.yaml @@ -0,0 +1,161 @@ +# Self-improving workflow using the `conductor` skill +# +# This example shows how the generalized `skills:` capability gives an +# agent reusable, version-accurate knowledge of Conductor's YAML +# schema, execution model, and authoring patterns. It's the building +# block that a future `conductor watch` (issue #181) would chain into a +# closed-loop generate → review → fix cycle. The example demonstrates +# all three sides of that loop in one pass. +# +# Provider mechanism — same observable contract, different mechanism: +# * Copilot: the skill directory is registered on the SDK session via +# `skill_directories`. The agent discovers and loads SKILL.md and +# references on demand (progressive disclosure). +# * Claude: SKILL.md + references/*.md are eagerly prepended to the +# agent's rendered prompt inside +# ... tags. +# +# Tri-state opt-in via list presence: +# * Omit `skills:` → inherit `runtime.skills` +# * `skills: []` → explicit opt-out (e.g. the `formatter` agent +# below, which doesn't need workflow knowledge) +# * `skills: [name]` → explicit set, replaces the workflow default +# +# Usage: +# conductor run examples/skills-self-improving-workflow.yaml \ +# --input task="Write a workflow that summarises a GitHub issue." + +workflow: + name: skills-self-improving-workflow + description: | + Generate a Conductor workflow, review it for correctness against + the bundled `conductor` skill, then revise it based on the review. + Demonstrates the building-block pattern for future iterative + fix-validate loops (see issue #181). + version: "1.0.0" + entry_point: author + + runtime: + provider: copilot + skills: [conductor] # every provider-backed agent inherits this + + input: + task: + type: string + required: true + description: A short description of the workflow the author should generate. + +agents: + - name: author + description: Drafts an initial Conductor workflow for the given task. + prompt: | + You are authoring a brand-new Conductor workflow. + + Task: {{ workflow.input.task }} + + Produce a complete, runnable workflow YAML that solves the task. + Use the `conductor` skill for schema details, allowed step types, + routing patterns, and naming conventions. Prefer a minimal, + idiomatic design over an exhaustive one. + output: + workflow_yaml: + type: string + description: The drafted workflow YAML. + notes: + type: string + description: Author's notes on design choices. + routes: + - to: reviewer + + - name: reviewer + description: Reviews the drafted workflow for schema and design issues. + prompt: | + Review the following Conductor workflow YAML. + + ```yaml + {{ author.output.workflow_yaml }} + ``` + + Use the `conductor` skill to verify: + * Schema correctness (field names, types, required fields) + * Routing soundness (no orphan agents, no unreachable routes, + terminating routes present) + * Sensible context mode and failure-mode choices + * Appropriate use of step types (agent / script / set / wait / + terminate / parallel / for_each / human_gate / workflow) + + List blocking issues separately from suggestions. If the workflow + is already correct and idiomatic, say so plainly and return an + empty `issues` list. + output: + issues: + type: string + description: Blocking schema or correctness issues. Empty if none. + suggestions: + type: string + description: Non-blocking improvements. + verdict: + type: string + description: One of "approved" or "needs_revision". + routes: + - to: fixer + when: "{{ reviewer.output.verdict == 'needs_revision' }}" + - to: formatter + + - name: fixer + description: Applies the reviewer's findings and produces a revised workflow. + prompt: | + The reviewer flagged issues with the drafted workflow. + + Original: + ```yaml + {{ author.output.workflow_yaml }} + ``` + + Review findings: + - Issues: {{ reviewer.output.issues }} + - Suggestions: {{ reviewer.output.suggestions }} + + Use the `conductor` skill to produce a corrected, complete + workflow YAML that addresses every blocking issue. Adopt + suggestions where they materially improve the design. Output the + final YAML and a brief changelog. + output: + workflow_yaml: + type: string + description: The revised workflow YAML. + changelog: + type: string + description: Short summary of what changed and why. + routes: + - to: formatter + + - name: formatter + description: Renders the final workflow + report. Doesn't need the skill. + # Explicit opt-out: this agent only formats existing strings. No + # need to bloat its prompt with the full skill content. + skills: [] + prompt: | + Produce a Markdown report containing two sections: + + ## Workflow + + ```yaml + {{ fixer.output.workflow_yaml | default(author.output.workflow_yaml) }} + ``` + + ## Notes + + Original notes: {{ author.output.notes }} + Review verdict: {{ reviewer.output.verdict }} + {% if fixer.output.changelog is defined %} + Changelog: {{ fixer.output.changelog }} + {% endif %} + output: + report: + type: string + description: The final Markdown report. + +output: + report: "{{ formatter.output.report }}" + verdict: "{{ reviewer.output.verdict }}" diff --git a/plugins/conductor/skills/conductor/SKILL.md b/plugins/conductor/skills/conductor/SKILL.md index db30c5ee..0aa2f07d 100644 --- a/plugins/conductor/skills/conductor/SKILL.md +++ b/plugins/conductor/skills/conductor/SKILL.md @@ -5,7 +5,7 @@ description: Validate, run, and execute workflows; creating new workflows when e # Conductor -CLI tool for defining and running multi-agent workflows with the GitHub Copilot SDK, Anthropic Claude, or Claude Agent SDK. +CLI tool for defining and running multi-agent workflows with the GitHub Copilot SDK or Anthropic Claude. > **DO NOT create new workflow files unless the user explicitly asks you to create one.** Default to running, validating, or debugging existing workflows. If the user's request is ambiguous, assume they want to run or modify an existing workflow rather than create a new one. diff --git a/plugins/conductor/skills/conductor/references/authoring.md b/plugins/conductor/skills/conductor/references/authoring.md index 517da2c6..7fc51fc4 100644 --- a/plugins/conductor/skills/conductor/references/authoring.md +++ b/plugins/conductor/skills/conductor/references/authoring.md @@ -20,6 +20,7 @@ workflow: max_agent_iterations: 50 # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: 120 # Wall-clock timeout per agent session (optional) default_reasoning_effort: medium # Workflow-wide reasoning effort: low, medium, high, xhigh (optional) + skills: [conductor] # Skills available to every provider-backed agent (optional) input: # Define workflow inputs param_name: @@ -116,6 +117,11 @@ agents: reasoning: # Override runtime.default_reasoning_effort (optional) effort: high # low, medium, high, or xhigh + skills: [conductor] # Skills this agent has access to (optional, tri-state) + # Omit = inherit runtime.skills; [] = explicit opt-out; + # [name, ...] = explicit set. Not allowed on + # script/human_gate/workflow/wait/set/terminate agents. + routes: # Where to go next - to: next_agent ``` @@ -148,6 +154,40 @@ agents: See `examples/reasoning-effort.yaml` for a complete example. +### Skills + +`skills` enables reusable knowledge or capability bundles for provider-backed agents. The Conductor distribution ships one built-in skill — `conductor` — which packages the YAML schema, execution model, and authoring patterns (the same content this reference doc covers) so an agent can evaluate, improve, debug, or generate Conductor workflows. + +**Tri-state per-agent field (resolved via list presence):** +- Omit `skills:` — inherit from `runtime.skills` +- `skills: []` — explicit opt-out (no skills for this agent, regardless of workflow default) +- `skills: [name, ...]` — explicit set, replaces the workflow default + +**Workflow-wide default:** `runtime.skills: [conductor]` enables it for every provider-backed agent. Individual agents can override. + +**Provider mechanism (same observable contract — "the agent has access to the named skill"):** +- **Copilot** — the resolved skill directory is registered on the SDK session via `skill_directories`, so the agent discovers and loads skill content natively (progressive disclosure via `SKILL.md` frontmatter). This is more token-efficient than eager injection. +- **Claude** — the loader reads `SKILL.md` plus every `references/*.md` file in the skill directory and prepends them to the agent's rendered prompt inside `...` tags. Inserted between workspace instructions and the user prompt. + +Not allowed on `script`, `human_gate`, `workflow`, `wait`, `set`, or `terminate` agent types. Unknown skill names fail at workflow validation time. + +```yaml +workflow: + runtime: + skills: [conductor] # all provider-backed agents get the conductor skill + +agents: + - name: workflow_reviewer + skills: [conductor] # per-agent opt-in (redundant here, kept for clarity) + prompt: "Review this workflow for correctness..." + + - name: simple_agent + skills: [] # opt out even when runtime default is set + prompt: "Do something simple." +``` + +See `examples/skills-self-improving-workflow.yaml` for a complete example. + ## Routing Patterns ### Linear @@ -696,18 +736,8 @@ agents: ### Gate Output Human gates automatically capture: -- `output.selected` — the `value` of the chosen option. -- `output.additional_input` — dict of values collected from `prompt_for` fields. - Always present; `{}` when no `prompt_for` was specified or the selected option - has no `prompt_for`. Access individual fields via templates as - `{{ .output.additional_input. }}` (for example - `{{ approval_gate.output.additional_input.feedback }}` when an option declares - `prompt_for: feedback`). - -> **`context: explicit` mode note.** `input:` declarations support -> `.output.additional_input` (the whole dict) but not the dotted shorthand -> `.output.additional_input.`. Declare the parent key and read -> individual fields via Jinja2 in the agent's prompt or output template. +- `output.selected` - the `value` of the chosen option +- `output.feedback` - text input from `prompt_for` (if specified) ## Context Modes diff --git a/plugins/conductor/skills/conductor/references/yaml-schema.md b/plugins/conductor/skills/conductor/references/yaml-schema.md index 75b9d5c4..a89ce2e6 100644 --- a/plugins/conductor/skills/conductor/references/yaml-schema.md +++ b/plugins/conductor/skills/conductor/references/yaml-schema.md @@ -6,7 +6,7 @@ Complete reference for all YAML configuration options. Derived from the Pydantic ```yaml workflow: WorkflowDef # Required: workflow configuration -tools: [string] # Optional: workflow-level tool names (ignored by claude-agent-sdk — uses CLI config) +tools: [string] # Optional: workflow-level tool names agents: [AgentDef] # Required: agent definitions parallel: [ParallelGroup] # Optional: static parallel groups for_each: [ForEachDef] # Optional: dynamic parallel groups @@ -27,16 +27,20 @@ workflow: # Runtime configuration runtime: - provider: string | object # "copilot" (default), "claude", "claude-agent-sdk", or "openai-agents" + provider: string | object # "copilot" (default), "claude", or "openai-agents" # — or a ProviderSettings object (see below) default_model: string # Default model for all agents - temperature: float # 0.0-1.0, controls randomness (optional, copilot/claude only) - max_tokens: integer # Max OUTPUT tokens per response, 1-200000 (optional, copilot/claude only) - timeout: float # Per-request timeout in seconds (optional, default: 600, copilot/claude only) + temperature: float # 0.0-1.0, controls randomness (optional) + max_tokens: integer # Max OUTPUT tokens per response, 1-200000 (optional) + timeout: float # Per-request timeout in seconds (optional, default: 600) max_agent_iterations: integer # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: float # Wall-clock timeout per agent session in seconds (optional) default_reasoning_effort: string # Workflow-wide reasoning/thinking effort: low, medium, high, xhigh (optional) - mcp_servers: # MCP server configurations (ignored by claude-agent-sdk — uses CLI config) + skills: [string] # Skills enabled for every provider-backed agent (default: []) + # Currently registered built-ins: "conductor" + # Copilot loads natively via `skill_directories`; + # Claude eagerly injects SKILL.md + references/*.md into the prompt. + mcp_servers: # MCP server configurations : type: string # "stdio" (default), "http", or "sse" command: string # Command to run (required for stdio) @@ -107,7 +111,7 @@ agents: type: string # "agent" (default), "human_gate", "script", "workflow", "wait", or "terminate" description: string # What this agent does model: string # Override default_model - provider: string # Per-agent provider override ("copilot", "claude", or "claude-agent-sdk") + provider: string # Per-agent provider override ("copilot" or "claude") # Input specification (for explicit context mode) input: @@ -153,6 +157,13 @@ agents: reasoning: effort: string # low, medium, high, or xhigh + # Skills (optional, only on provider-backed agents) + # Tri-state via list presence: + # - omit: inherit runtime.skills + # - skills: [] explicit opt-out (no skills for this agent) + # - skills: [a, b] explicit set, replaces the workflow default + skills: [string] # Skill names enabled for this agent (built-ins: "conductor") + # Per-agent retry policy (optional, not allowed for script, human_gate, workflow, or wait agents) retry: max_attempts: integer # Max attempts including first (1-10, default: 1 = no retry) @@ -390,19 +401,11 @@ agents: route: string # Agent to route to when selected prompt_for: string # Optional: field name to collect text input from user - output: # Captured automatically (do not declare in YAML) - selected: # The chosen option's `value` + output: # Captured automatically + selected: # The selected option value + type: string + feedback: # Text from prompt_for (if used) type: string - additional_input: # Dict of values collected from `prompt_for` fields. - type: dict # Always present; `{}` when no `prompt_for` is set - # or when the selected option has no `prompt_for`. - # Access fields via templates as: - # {{ .output.additional_input. }} - # In `context: explicit` mode, `input:` declarations - # support `.output.additional_input` (the whole - # dict) but not the dotted shorthand - # `.output.additional_input.` — declare - # the parent and traverse in Jinja2. ``` ## Parallel Group Schema diff --git a/pyproject.toml b/pyproject.toml index 7b8f87bd..ba02cced 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,9 @@ exclude = [ "src/conductor/web/frontend", ] +[tool.hatch.build.targets.wheel.force-include] +"plugins/conductor/skills/conductor" = "plugins/conductor/skills/conductor" + [dependency-groups] dev = [ "pytest>=9.0.3", diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index a2ea6300..efea6c7d 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -779,6 +779,46 @@ class AgentDef(BaseModel): effort: high """ + skills: list[str] | None = None + """Opt this agent into a list of named built-in skills. + + Each entry is a skill name registered in :mod:`conductor.skills`. + The agent receives that skill's content via whichever mechanism the + provider supports natively: + + * **Copilot** — skill directories are passed to the SDK session via + ``skill_directories``; the model discovers and loads skill content + as relevant (progressive disclosure, token-efficient). + * **Claude / Claude Agent SDK** — ``SKILL.md`` plus + ``references/*.md`` is eagerly injected into the agent's rendered + prompt, wrapped in ```` tags. There is no native + skill surface on the Anthropic API without adopting the + container/code-execution beta. + + Tri-state semantics via list presence: + + * ``None`` (omitted): inherit from ``workflow.runtime.skills`` + * ``[]`` (empty list): explicit none — overrides any workflow + default + * ``[name, ...]``: explicit set — overrides any workflow default + + Skills built into Conductor today: + + * ``conductor`` — comprehensive knowledge of Conductor's YAML + schema, execution model, authoring patterns, and CLI commands. + Enables agents to evaluate, improve, debug, or generate Conductor + workflows. + + Only applies to provider-backed agents (type='agent' or None). + + Example YAML:: + + agents: + - name: workflow_reviewer + skills: [conductor] + prompt: "Review this workflow for correctness..." + """ + status: Literal["success", "failed"] | None = None """Outcome status for ``type: terminate`` steps. @@ -845,6 +885,28 @@ def validate_timeout(cls, v: int | None) -> int | None: raise ValueError("timeout must be a positive integer") return v + @field_validator("skills") + @classmethod + def validate_skills(cls, v: list[str] | None) -> list[str] | None: + """Ensure every skill name resolves to a known built-in. + + Validates at load time so unknown skill names surface in + ``conductor validate`` and ``conductor run`` startup rather than + at execute time. Empty lists are allowed (explicit opt-out). + """ + if v is None: + return v + from conductor.skills import SkillNotFoundError, get_skill_directory + + for name in v: + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"skills entries must be non-empty strings, got {name!r}") + try: + get_skill_directory(name) + except SkillNotFoundError as exc: + raise ValueError(str(exc)) from exc + return v + @field_validator("duration", mode="before") @classmethod def reject_bool_duration(cls, v: Any) -> Any: @@ -892,6 +954,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("human_gate agents cannot have 'max_depth'") if self.reasoning is not None: raise ValueError("human_gate agents cannot have 'reasoning'") + if self.skills is not None: + raise ValueError("human_gate agents cannot have 'skills'") if self.timeout_seconds is not None: raise ValueError("human_gate agents cannot have 'timeout_seconds'") if self.value is not None: @@ -931,6 +995,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("script agents cannot have 'max_depth'") if self.reasoning is not None: raise ValueError("script agents cannot have 'reasoning'") + if self.skills is not None: + raise ValueError("script agents cannot have 'skills'") if self.timeout_seconds is not None: raise ValueError( "script agents cannot have 'timeout_seconds' " @@ -1016,6 +1082,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("wait agents cannot have 'dialog'") if self.reasoning is not None: raise ValueError("wait agents cannot have 'reasoning'") + if self.skills is not None: + raise ValueError("wait agents cannot have 'skills'") if self.timeout_seconds is not None: raise ValueError("wait agents cannot have 'timeout_seconds'") if self.output is not None: @@ -1075,6 +1143,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("set agents cannot have 'dialog'") if self.reasoning is not None: raise ValueError("set agents cannot have 'reasoning'") + if self.skills is not None: + raise ValueError("set agents cannot have 'skills'") if self.timeout_seconds is not None: raise ValueError("set agents cannot have 'timeout_seconds'") if self.duration is not None: @@ -1133,6 +1203,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("terminate agents cannot have 'dialog'") if self.reasoning is not None: raise ValueError("terminate agents cannot have 'reasoning'") + if self.skills is not None: + raise ValueError("terminate agents cannot have 'skills'") if self.workflow: raise ValueError("terminate agents cannot have 'workflow'") if self.input_mapping is not None: @@ -1183,6 +1255,8 @@ def validate_agent_type(self) -> AgentDef: ) if self.type == "workflow" and self.reasoning is not None: raise ValueError("workflow agents cannot have 'reasoning'") + if self.type == "workflow" and self.skills is not None: + raise ValueError("workflow agents cannot have 'skills'") # Wait-only fields are forbidden on every other type. ``reason`` is # shared with ``type: terminate`` (which has its own required-non- @@ -1565,6 +1639,45 @@ def _coerce_provider(cls, value: Any) -> Any: the request through to the SDK. """ + skills: list[str] = Field(default_factory=list) + """Workflow-wide default skills for every provider-backed agent. + + Each entry is a skill name registered in :mod:`conductor.skills` (e.g. + ``conductor``). Every provider-backed agent inherits this list as its + default; individual agents override by setting their own ``skills:`` + field (use ``skills: []`` for explicit opt-out). + + Skill content reaches the model differently per provider: + + * **Copilot** — registered on the SDK session via ``skill_directories`` + * **Claude / Claude Agent SDK** — eagerly injected into the rendered + prompt inside ``...`` tags + + Defaults to an empty list (no skills). Phase 1 ships one built-in + skill (``conductor``); user-defined skill directories will be added + in a follow-up. + + Example YAML:: + + runtime: + skills: [conductor] + """ + + @field_validator("skills") + @classmethod + def validate_skills(cls, v: list[str]) -> list[str]: + """Ensure every workflow-default skill name resolves to a known built-in.""" + from conductor.skills import SkillNotFoundError, get_skill_directory + + for name in v: + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"skills entries must be non-empty strings, got {name!r}") + try: + get_skill_directory(name) + except SkillNotFoundError as exc: + raise ValueError(str(exc)) from exc + return v + class WorkflowDef(BaseModel): """Top-level workflow configuration.""" diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py index bd7b85b7..394dbaec 100644 --- a/src/conductor/engine/workflow.py +++ b/src/conductor/engine/workflow.py @@ -382,6 +382,11 @@ def __init__( # Workspace instructions preamble (inherited by sub-workflows) self._instructions_preamble = instructions_preamble + # Workflow-level default skills (runtime.skills) — inherited by + # every provider-backed agent unless the agent overrides with + # its own ``skills`` field. + self._workflow_skills = list(config.workflow.runtime.skills) + # For backward compatibility, create a default executor with single provider # This is used when registry is None if provider is not None: @@ -389,6 +394,7 @@ def __init__( provider, workflow_tools=config.tools, instructions_preamble=self._instructions_preamble, + workflow_skills=self._workflow_skills, ) self.provider = provider # Keep for backward compatibility else: @@ -804,6 +810,7 @@ async def _get_executor_for_agent(self, agent: AgentDef) -> AgentExecutor: provider, workflow_tools=self.config.tools, instructions_preamble=self._instructions_preamble, + workflow_skills=self._workflow_skills, ) elif self.executor is not None: # Single provider mode (backward compatibility) diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index a0cc8ccf..196dcfa2 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -98,6 +98,7 @@ def __init__( provider: AgentProvider, workflow_tools: list[str] | None = None, instructions_preamble: str | None = None, + workflow_skills: list[str] | None = None, ) -> None: """Initialize the AgentExecutor. @@ -106,10 +107,15 @@ def __init__( workflow_tools: Tools defined at workflow level. Defaults to empty list. instructions_preamble: Optional workspace instructions text to prepend to every agent's rendered prompt. + workflow_skills: Workflow-level default skills (from + ``runtime.skills``). Agents inherit this list unless they + set their own ``skills:`` field — ``[]`` opts out + explicitly, ``[name, ...]`` overrides the default. """ self.provider = provider self.workflow_tools = workflow_tools or [] self.instructions_preamble = instructions_preamble + self._workflow_skills: list[str] = list(workflow_skills or []) self.renderer = TemplateRenderer() async def execute( @@ -157,9 +163,10 @@ async def execute( # Render prompt with context rendered_prompt = self.renderer.render(agent.prompt, context) - # Prepend workspace instructions preamble if available - if self.instructions_preamble: - rendered_prompt = self.instructions_preamble + rendered_prompt + # Prepend prompt prefix (workspace instructions + optional skills) + prefix = self._build_prompt_prefix(agent) + if prefix: + rendered_prompt = prefix + rendered_prompt # Append user guidance section if provided if guidance_section: @@ -200,6 +207,18 @@ async def execute( if resolved_tools: _verbose_log(f" Tools: {resolved_tools}") + # Resolve skill directories for providers with native skill support + # (Copilot passes these on session_kwargs; Claude has already had + # the skill content eager-injected into rendered_prompt above and + # ignores this). + skill_dirs: list[str] | None = None + if getattr(self.provider, "supports_native_skills", False): + skill_names = self._resolve_skills_for_agent(agent) + if skill_names: + from conductor.skills import resolve_skill_directories + + skill_dirs = [str(p) for p in resolve_skill_directories(skill_names)] + # Execute via provider output = await self.provider.execute( agent=agent, @@ -208,6 +227,7 @@ async def execute( tools=resolved_tools, interrupt_signal=interrupt_signal, event_callback=event_callback, + skill_directories=skill_dirs, ) # Ensure output.content is a dict @@ -245,12 +265,62 @@ def render_prompt(self, agent: AgentDef, context: dict[str, Any]) -> str: context: Context for prompt rendering. Returns: - Rendered prompt string with workspace instructions prepended if configured. + Rendered prompt string with workspace instructions and optional + skill content prepended if configured. Raises: TemplateError: If prompt rendering fails. """ rendered = self.renderer.render(agent.prompt, context) - if self.instructions_preamble: - rendered = self.instructions_preamble + rendered + prefix = self._build_prompt_prefix(agent) + if prefix: + rendered = prefix + rendered return rendered + + def _resolve_skills_for_agent(self, agent: AgentDef) -> list[str]: + """Resolve the effective skill list for an agent. + + Resolution order: + - If the agent explicitly sets ``skills`` (including ``[]``), + that value wins. + - Otherwise, inherit the workflow-level default + (``runtime.skills``). + + Returns an empty list when no skills are enabled, or when the + agent is not a provider-backed type (script / wait / set / + terminate / human_gate / workflow — schema rejects ``skills`` + on these so this is defensive only). + """ + if agent.type not in (None, "agent"): + return [] + if agent.skills is not None: + return list(agent.skills) + return list(self._workflow_skills) + + def _build_prompt_prefix(self, agent: AgentDef) -> str: + """Build the prefix to prepend before an agent's rendered prompt. + + Combines workspace instructions and (on providers that lack + native skill support) eager skill-content injection into a + single prefix string. Shared by :meth:`execute` and + :meth:`render_prompt` so the rendered prompts match the prompts + sent to the provider. + + On providers that support native skill loading + (:attr:`AgentProvider.supports_native_skills`), the skill + directories are passed to the SDK on the provider side and we + skip preamble injection to avoid double-loading. + """ + parts: list[str] = [] + if self.instructions_preamble: + parts.append(self.instructions_preamble) + if not getattr(self.provider, "supports_native_skills", False): + skill_names = self._resolve_skills_for_agent(agent) + if skill_names: + from conductor.skills import load_skill_content, resolve_skill_directories + + dirs = resolve_skill_directories(skill_names) + content = load_skill_content(list(zip(skill_names, dirs, strict=True))) + if content: + parts.append(content) + return "".join(parts) diff --git a/src/conductor/providers/base.py b/src/conductor/providers/base.py index af778b10..41194a61 100644 --- a/src/conductor/providers/base.py +++ b/src/conductor/providers/base.py @@ -160,6 +160,30 @@ class AgentProvider(ABC): # subclass at import time. CAPABILITIES: ClassVar[ProviderCapabilities | None] = None + @property + def supports_native_skills(self) -> bool: + """Whether the provider loads skill content natively. + + When ``True``, the :class:`~conductor.executor.agent.AgentExecutor` + passes resolved skill directories to :meth:`execute` via + ``skill_directories`` and skips eager preamble injection — the + provider's SDK is expected to discover and load skill content + itself (e.g. Copilot's session-level ``skill_directories``). + + When ``False`` (default), the executor eagerly injects the full + ``SKILL.md`` plus ``references/*.md`` content into the agent's + rendered prompt — appropriate for providers like Claude where + there is no server-side skill surface without adopting the + container/code-execution beta. + + Providers MUST also declare ``skills=True`` on their + :class:`ProviderCapabilities` descriptor regardless of which + mechanism they use — the capability flag asserts the user-facing + contract ("the agent has access to the named skill"), this + property selects the mechanism. + """ + return False + def __init_subclass__(cls, *, abstract: bool = False, **kwargs: Any) -> None: """Enforce that every production subclass declares ``CAPABILITIES``. @@ -193,6 +217,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: """Execute an agent and return normalized output. @@ -210,6 +235,14 @@ async def execute( event_callback: Optional callback for streaming SDK events upstream (reasoning, tool calls, messages). Called with (event_type, data_dict) for each interesting SDK event. + skill_directories: Optional skill directories resolved from + the agent's effective ``skills`` list. Providers that + set :attr:`supports_native_skills` to ``True`` should + forward these to their SDK's native skill-loading + mechanism. Providers that do not support native skills + may ignore this parameter (the executor will have + eager-injected the skill content into + ``rendered_prompt`` instead). Returns: Normalized AgentOutput with structured content. diff --git a/src/conductor/providers/capabilities.py b/src/conductor/providers/capabilities.py index 2b578341..13de1928 100644 --- a/src/conductor/providers/capabilities.py +++ b/src/conductor/providers/capabilities.py @@ -121,6 +121,27 @@ class ProviderCapabilities(BaseModel): :class:`ParallelGroup`, and may only appear in a :class:`ForEachDef` whose ``max_concurrent`` is 1.""" + skills: bool = False + """``True`` when the provider exposes :mod:`conductor.skills` content + to the agent. The user-facing contract is the same regardless of + mechanism — *"the agent has access to the named skill"* — but how + the content reaches the model is selected by + :attr:`AgentProvider.supports_native_skills`: + + * ``supports_native_skills=True`` — resolved skill directories are + passed to the SDK on the ``skill_directories`` kwarg of + :meth:`AgentProvider.execute` and the SDK loads skill content + itself (progressive disclosure via ``SKILL.md`` frontmatter). + * ``supports_native_skills=False`` — :class:`AgentExecutor` reads + every enabled skill's ``SKILL.md`` plus ``references/*.md`` and + eagerly prepends them to ``rendered_prompt`` inside + ``...`` tags. + + Workflows that declare ``runtime.skills`` or per-agent ``skills:`` + against a provider with ``skills=False`` fail validation. Defaults + to ``False`` so providers that have not been audited for skills + surface the mismatch loudly.""" + upstream_pin: str | None = None """Upstream package pin surfaced in the experimental banner, e.g. ``"claude-agent-sdk>=0.1.0"``. ``None`` for providers that have no @@ -189,6 +210,8 @@ def declared_limitations(self) -> list[str]: items.append("no usage tracking") if not self.concurrent_safe: items.append("not safe to run in parallel") + if not self.skills: + items.append("no skills support") return items @@ -239,6 +262,7 @@ def _build_unimplemented_placeholder() -> ProviderCapabilities: checkpoint_resume=True, usage_tracking=True, concurrent_safe=True, + skills=True, upstream_pin=None, maintainer="(not yet implemented)", ) diff --git a/src/conductor/providers/claude.py b/src/conductor/providers/claude.py index 7b36622c..3dd643b0 100644 --- a/src/conductor/providers/claude.py +++ b/src/conductor/providers/claude.py @@ -156,6 +156,10 @@ class ClaudeProvider(AgentProvider): usage_tracking=True, # No global mutable state — safe to run N parallel agents. concurrent_safe=True, + # Skill content is eagerly injected into the rendered prompt by + # AgentExecutor (Claude's Messages API has no server-side skill + # surface without adopting the container/code-execution beta). + skills=True, upstream_pin=None, maintainer="@microsoft/conductor", ) @@ -664,6 +668,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: """Execute an agent using the Claude SDK. @@ -676,6 +681,11 @@ async def execute( When set during the agentic loop, Claude is asked to emit partial output via the ``emit_output`` tool, and the result is returned with ``partial=True``. + skill_directories: Ignored. Claude has no server-side skill + surface without adopting the container/code-execution + beta, so :class:`AgentExecutor` has already eager-injected + the skill content into ``rendered_prompt`` for this + provider (see :attr:`AgentProvider.supports_native_skills`). Returns: Normalized AgentOutput with structured content. @@ -684,6 +694,7 @@ async def execute( ProviderError: If SDK execution fails. ValidationError: If output doesn't match schema. """ + del skill_directories # Claude relies on eager preamble injection (see docstring). # Use retry logic wrapper for execution return await self._execute_with_retry( agent, diff --git a/src/conductor/providers/claude_agent_sdk.py b/src/conductor/providers/claude_agent_sdk.py index fb06b44b..d36b9be5 100644 --- a/src/conductor/providers/claude_agent_sdk.py +++ b/src/conductor/providers/claude_agent_sdk.py @@ -156,6 +156,11 @@ class ClaudeAgentSdkProvider(AgentProvider): # No global mutable state shared across calls — the SDK spawns # an independent subprocess per query() invocation. concurrent_safe=True, + # Skill content is eagerly injected into the rendered prompt by + # AgentExecutor (the claude-agent-sdk surfaces no + # ``skill_directories`` kwarg today; once it does we can flip + # to native via ``supports_native_skills``). + skills=True, upstream_pin="claude-agent-sdk>=0.1.0", maintainer="@lesandiz (best-effort)", ) @@ -184,7 +189,14 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: + # Skill content is eager-injected by AgentExecutor for this + # provider — ``claude-agent-sdk`` exposes no skill kwarg today. + # If/when the upstream SDK gains one, flip + # ``supports_native_skills`` to True and forward this arg. + del skill_directories + if query is None or ClaudeAgentOptions is None: raise ProviderError("Claude Agent SDK not available") diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py index a51825ff..011c8cdb 100644 --- a/src/conductor/providers/copilot.py +++ b/src/conductor/providers/copilot.py @@ -190,10 +190,26 @@ class CopilotProvider(AgentProvider): usage_tracking=True, # No global mutable state — safe to run N parallel agents. concurrent_safe=True, + # Skill directories are registered on the SDK session via + # ``session_kwargs["skill_directories"]`` and the agent discovers + # skill content natively (progressive disclosure). + skills=True, upstream_pin=None, maintainer="@microsoft/conductor", ) + @property + def supports_native_skills(self) -> bool: + """Copilot SDK loads skills natively via ``skill_directories``. + + When :class:`AgentExecutor` resolves an agent's effective + skills, it forwards the resolved directories on the + :meth:`execute` ``skill_directories`` kwarg and skips eager + preamble injection — the SDK is expected to discover skills via + their ``SKILL.md`` frontmatter (progressive disclosure). + """ + return True + def __init__( self, mock_handler: Callable[[AgentDef, str, dict[str, Any]], dict[str, Any]] | None = None, @@ -426,6 +442,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: """Execute an agent using the Copilot SDK. @@ -441,6 +458,10 @@ async def execute( When set during execution, the provider will attempt to abort the current session and return partial output. event_callback: Optional callback for streaming SDK events upstream. + skill_directories: Optional directories to register as + ``skill_directories`` on the SDK session so the Copilot + agent can discover and load skills natively (progressive + disclosure). Returns: Normalized AgentOutput with structured content. @@ -471,6 +492,7 @@ async def execute( tools, interrupt_signal=interrupt_signal, event_callback=event_callback, + skill_directories=skill_directories, ) def _resolve_retry_config(self, agent: AgentDef) -> RetryConfig: @@ -509,6 +531,7 @@ async def _execute_with_retry( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: """Execute with exponential backoff retry logic. @@ -522,6 +545,8 @@ async def _execute_with_retry( tools: List of tool names available to this agent. interrupt_signal: Optional event for mid-agent interrupt signaling. event_callback: Optional callback for streaming SDK events upstream. + skill_directories: Optional skill directories forwarded to the + SDK session for native skill discovery. Returns: Normalized AgentOutput with structured content. @@ -541,6 +566,7 @@ async def _execute_with_retry( tools, interrupt_signal=interrupt_signal, event_callback=event_callback, + skill_directories=skill_directories, ) # Extract usage data from SDK response if available input_tokens = sdk_response.input_tokens if sdk_response else None @@ -678,6 +704,7 @@ async def _execute_sdk_call( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, ) -> tuple[dict[str, Any], SDKResponse | None]: """Execute the actual SDK call or mock handler. @@ -688,6 +715,11 @@ async def _execute_sdk_call( tools: List of tool names available to this agent. interrupt_signal: Optional event for mid-agent interrupt signaling. event_callback: Optional callback for streaming SDK events upstream. + skill_directories: Optional skill directories registered on the + SDK session via ``session_kwargs["skill_directories"]`` so + the Copilot agent can discover and load bundled skills + natively (progressive disclosure via ``SKILL.md`` + frontmatter). Returns: Tuple of (content dict, SDKResponse with usage data or None for mock). @@ -763,6 +795,11 @@ async def _execute_sdk_call( if self._mcp_servers: session_kwargs["mcp_servers"] = self._mcp_servers + # Register skill directories so the SDK agent can discover + # bundled skills (progressive disclosure via SKILL.md frontmatter). + if skill_directories: + session_kwargs["skill_directories"] = list(skill_directories) + # Apply custom provider routing (Ollama / vLLM / Azure / etc.) # when runtime.provider opted into it. self._apply_provider_config(session_kwargs) diff --git a/src/conductor/skills/__init__.py b/src/conductor/skills/__init__.py new file mode 100644 index 00000000..c24408a3 --- /dev/null +++ b/src/conductor/skills/__init__.py @@ -0,0 +1,46 @@ +"""Conductor Skills — generalized opt-in capabilities for Conductor agents. + +The skills system lets agents opt into bundled, reusable knowledge or +capabilities (e.g. the Conductor knowledge base, code-review rules, +domain-specific guidance) via the ``skills:`` field on ``AgentDef`` or +``RuntimeConfig``. + +Skills follow the cross-cutting skill format used by GitHub Copilot CLI +and Anthropic Claude Code (a directory containing ``SKILL.md`` plus +optional ``references/*.md``). Conductor is a *consumer* of that format +alongside the Copilot CLI plugin — there is one canonical source of +truth per skill (no duplicated docs). + +Provider-parity contract: + *"The agent has access to the named skill."* Mechanism differs by + provider (same pattern as MCP): + + * **Copilot** — native ``skill_directories`` on the SDK session. + Skill becomes discoverable; the model loads it as relevant + (progressive disclosure, token-efficient). + * **Claude** — eager preamble injection of ``SKILL.md`` plus + ``references/*.md`` into the agent's rendered prompt. The + Anthropic API has no server-side skill surface without adopting + the container/code-execution beta. + +Phase 1 ships one built-in skill: ``conductor``, sourced from +``plugins/conductor/skills/conductor/``. Future phases will add +user-defined skill directories, executable skill resources, and +progressive disclosure via MCP. +""" + +from conductor.skills.loader import load_skill_content +from conductor.skills.registry import ( + SkillNotFoundError, + get_skill_directory, + list_builtin_skills, + resolve_skill_directories, +) + +__all__ = [ + "SkillNotFoundError", + "get_skill_directory", + "list_builtin_skills", + "load_skill_content", + "resolve_skill_directories", +] diff --git a/src/conductor/skills/loader.py b/src/conductor/skills/loader.py new file mode 100644 index 00000000..a02d4f32 --- /dev/null +++ b/src/conductor/skills/loader.py @@ -0,0 +1,97 @@ +"""Load skill content for eager preamble injection (Claude-path mechanism). + +On providers that lack a native skill surface (Claude, today), Conductor +loads the full ``SKILL.md`` plus every ``references/*.md`` file from +each enabled skill's directory and prepends them to the agent's rendered +prompt, wrapped in ```` tags. On providers with native +support (Copilot's ``skill_directories``), eager injection is skipped +and the SDK handles discovery natively — the model loads skill content +only when relevant, which is more token-efficient. + +The loader is the *content* side of the skill abstraction. The +:mod:`conductor.skills.registry` module is the *resolution* side. +Results are cached per-directory for the lifetime of the process — skill +content is bundled and immutable. +""" + +from __future__ import annotations + +import functools +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + +_HEADER = ( + "The following content describes skills available to this agent. " + "Each skill provides reusable knowledge or capabilities — consult " + "the relevant skill when its description matches the task at hand." +) + + +def _read_skill_dir(skill_dir: Path) -> str: + """Read ``SKILL.md`` plus all ``references/*.md`` files in order. + + Returns the concatenated text, with each file preceded by a heading + divider. Returns an empty string if the directory has no readable + content. + """ + sections: list[str] = [] + + skill_md = skill_dir / "SKILL.md" + if skill_md.is_file(): + try: + text = skill_md.read_text(encoding="utf-8").strip() + except (OSError, UnicodeDecodeError) as exc: + logger.warning("Failed to read SKILL.md at %s: %s", skill_md, exc) + text = "" + if text: + sections.append(f"# SKILL.md\n\n{text}") + + references_dir = skill_dir / "references" + if references_dir.is_dir(): + for ref in sorted(references_dir.glob("*.md")): + try: + text = ref.read_text(encoding="utf-8").strip() + except (OSError, UnicodeDecodeError) as exc: + logger.warning("Failed to read %s: %s", ref, exc) + continue + if text: + sections.append(f"# references/{ref.name}\n\n{text}") + + return "\n\n---\n\n".join(sections) + + +@functools.lru_cache(maxsize=32) +def _cached_skill_payload(skill_dir_str: str, name: str) -> str: + skill_dir = Path(skill_dir_str) + body = _read_skill_dir(skill_dir) + if not body: + return "" + size_kb = len(body.encode("utf-8")) / 1024 + logger.info("Loaded skill %r from %s (%.1fKB)", name, skill_dir, size_kb) + return f'\n{body}\n' + + +def load_skill_content(skills: list[tuple[str, Path]]) -> str: + """Load and concatenate skill content for eager preamble injection. + + Args: + skills: List of ``(skill_name, skill_dir)`` tuples in + presentation order. + + Returns: + A single string containing every skill's ``SKILL.md`` plus + ``references/*.md`` content wrapped in ```` + tags and prefaced with a header describing the section. Returns + an empty string when no skills produce any content. + """ + payloads = [ + payload + for name, skill_dir in skills + if (payload := _cached_skill_payload(str(skill_dir), name)) + ] + if not payloads: + return "" + body = "\n\n".join(payloads) + return f"\n{_HEADER}\n\n{body}\n\n\n" diff --git a/src/conductor/skills/registry.py b/src/conductor/skills/registry.py new file mode 100644 index 00000000..107f8ae4 --- /dev/null +++ b/src/conductor/skills/registry.py @@ -0,0 +1,131 @@ +"""Built-in skill registry for Conductor. + +Phase 1 ships a single built-in skill — ``conductor`` — that points at +the existing ``plugins/conductor/skills/conductor/`` directory inside the +wheel. The skill directory follows the Copilot/Claude-Code skill format: +``SKILL.md`` plus an optional ``references/`` subdirectory of supporting +docs. + +The plugins directory is bundled as wheel package data via the +``[tool.hatch.build.targets.wheel] artifacts`` entry in +``pyproject.toml``. Resolution prefers a package-relative location so +installed wheels work; it falls back to a source-checkout location for +editable installs and tests. + +Follow-up issues will add user-defined skill directories with a trust / +allowlist model; for now the registry only accepts built-in skill names. +""" + +from __future__ import annotations + +import logging +from functools import lru_cache +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class SkillNotFoundError(ValueError): + """Raised when a skill name is not found in the registry.""" + + +# Built-in skills. Maps the user-facing skill name (the string that +# appears in ``skills: [...]``) to a relative path from the repository +# root / wheel root where the skill directory lives. +_BUILTIN_SKILLS: dict[str, str] = { + "conductor": "plugins/conductor/skills/conductor", +} + + +@lru_cache(maxsize=1) +def _repo_or_wheel_root() -> Path: + """Locate the directory that contains the ``plugins/`` tree. + + Two layouts are supported: + + * **Editable install / source checkout** — ``plugins/`` lives at the + repository root, three directories above this file + (``src/conductor/skills/registry.py``). + * **Wheel install** — ``plugins/`` is bundled as package data via + hatchling's ``artifacts`` entry and lands alongside the + ``conductor/`` package directory inside ``site-packages``. + + We probe both. The first hit wins. + """ + here = Path(__file__).resolve() + + # Editable install / source checkout. + repo_root = here.parents[3] + if (repo_root / "plugins" / "conductor" / "skills").is_dir(): + return repo_root + + # Wheel install: artifacts land alongside the package itself + # (site-packages/plugins next to site-packages/conductor). + wheel_root = here.parents[2] + if (wheel_root / "plugins" / "conductor" / "skills").is_dir(): + return wheel_root + + # Fall back to repo_root so an eventual SkillNotFoundError surfaces a + # sensible path; callers will raise when the dir doesn't exist. + return repo_root + + +def list_builtin_skills() -> list[str]: + """Return the names of every built-in skill known to the registry.""" + return sorted(_BUILTIN_SKILLS.keys()) + + +def get_skill_directory(skill: str) -> Path: + """Resolve a built-in skill name to its on-disk directory. + + Args: + skill: The skill name as it appears in ``skills: [...]`` (e.g. + ``"conductor"``). + + Returns: + Absolute path to the skill directory. + + Raises: + SkillNotFoundError: If the skill name is not a known built-in + or the resolved directory does not exist on disk. + """ + rel = _BUILTIN_SKILLS.get(skill) + if rel is None: + available = ", ".join(list_builtin_skills()) or "(none)" + raise SkillNotFoundError( + f"Unknown skill {skill!r}. Available built-in skills: {available}. " + "User-defined skill directories are not yet supported." + ) + path = (_repo_or_wheel_root() / rel).resolve() + if not path.is_dir(): + raise SkillNotFoundError( + f"Built-in skill {skill!r} resolved to {path!s}, which does not " + "exist. This usually indicates a broken install; try reinstalling " + "conductor. If running from a source checkout, ensure the " + "plugins/ directory is present." + ) + return path + + +def resolve_skill_directories(skills: list[str]) -> list[Path]: + """Resolve a list of skill names to their on-disk directories. + + Args: + skills: List of built-in skill names. + + Returns: + List of absolute paths in the same order, with duplicates removed + (preserving first occurrence). + + Raises: + SkillNotFoundError: If any skill name is unknown. + """ + seen: set[Path] = set() + out: list[Path] = [] + for name in skills: + path = get_skill_directory(name) + if path in seen: + continue + seen.add(path) + out.append(path) + return out diff --git a/tests/test_engine/test_workflow.py b/tests/test_engine/test_workflow.py index 62ef35f8..0d7444fa 100644 --- a/tests/test_engine/test_workflow.py +++ b/tests/test_engine/test_workflow.py @@ -2625,6 +2625,7 @@ async def execute( tools=None, interrupt_signal=None, event_callback=None, + skill_directories=None, ): from conductor.providers.base import AgentOutput from conductor.providers.reasoning import resolve_reasoning_effort diff --git a/tests/test_engine/test_workflow_interrupt.py b/tests/test_engine/test_workflow_interrupt.py index 3efa8923..75c63217 100644 --- a/tests/test_engine/test_workflow_interrupt.py +++ b/tests/test_engine/test_workflow_interrupt.py @@ -919,6 +919,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback=None, + skill_directories: list[str] | None = None, ) -> AgentOutput: return AgentOutput(content={"result": "mock"}, raw_response="mock") diff --git a/tests/test_integration/test_existing_workflows_integration.py b/tests/test_integration/test_existing_workflows_integration.py index 261e2062..2076b7fe 100644 --- a/tests/test_integration/test_existing_workflows_integration.py +++ b/tests/test_integration/test_existing_workflows_integration.py @@ -181,7 +181,7 @@ async def test_schema_changes_dont_affect_copilot_provider(): # Serialization excludes None values dumped = runtime.model_dump(exclude_none=True) - assert dumped == {"provider": "copilot", "mcp_servers": {}} + assert dumped == {"provider": "copilot", "mcp_servers": {}, "skills": []} # Verify provider can be instantiated provider = CopilotProvider() diff --git a/tests/test_integration/test_mixed_providers.py b/tests/test_integration/test_mixed_providers.py index ccf7f66d..3f0f6c04 100644 --- a/tests/test_integration/test_mixed_providers.py +++ b/tests/test_integration/test_mixed_providers.py @@ -89,7 +89,7 @@ def test_claude_fields_ignored_by_copilot_provider(self, tmp_path): # Serialization excludes None values dumped = runtime.model_dump(exclude_none=True) - assert dumped == {"provider": "copilot", "mcp_servers": {}} + assert dumped == {"provider": "copilot", "mcp_servers": {}, "skills": []} def test_provider_parameter_isolation(self, tmp_path): """Test that provider-specific parameters don't interfere. @@ -188,6 +188,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback=None, + skill_directories: list[str] | None = None, ) -> AgentOutput: self.executed_agents.append(agent.name) return AgentOutput( diff --git a/tests/test_providers/test_capabilities.py b/tests/test_providers/test_capabilities.py index a87fe91b..a264c69b 100644 --- a/tests/test_providers/test_capabilities.py +++ b/tests/test_providers/test_capabilities.py @@ -26,6 +26,7 @@ def _stable_capabilities(**overrides: object) -> ProviderCapabilities: "checkpoint_resume": True, "usage_tracking": True, "concurrent_safe": True, + "skills": True, "upstream_pin": None, "maintainer": None, } @@ -121,6 +122,7 @@ def test_each_false_flag_produces_a_limitation(self) -> None: checkpoint_resume=False, usage_tracking=False, concurrent_safe=False, + skills=False, ) lims = caps.declared_limitations() # Every "off" capability shows up in the human-readable list. @@ -135,6 +137,7 @@ def test_each_false_flag_produces_a_limitation(self) -> None: assert "no checkpoint resume" in lims assert "no usage tracking" in lims assert "not safe to run in parallel" in lims + assert "no skills support" in lims def test_prompt_injection_structured_output_listed_as_limitation(self) -> None: caps = _stable_capabilities( diff --git a/tests/test_providers/test_copilot_skills.py b/tests/test_providers/test_copilot_skills.py new file mode 100644 index 00000000..e7712c90 --- /dev/null +++ b/tests/test_providers/test_copilot_skills.py @@ -0,0 +1,91 @@ +"""End-to-end: ``skill_directories`` reaches ``create_session`` on Copilot.""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock + +import pytest + +from conductor.config.schema import AgentDef +from conductor.providers.copilot import CopilotProvider + + +def _build_mocked_provider(captured: dict[str, Any]) -> CopilotProvider: + provider = CopilotProvider(model="custom-model") + provider._started = True + + session = AsyncMock() + captured_callback: dict[str, Any] = {} + + def on_event(callback: Any) -> None: + captured_callback["cb"] = callback + + session.on = on_event + + async def send(prompt: str) -> None: + cb = captured_callback["cb"] + + def make_event(t: str, content: str = "") -> Any: + ev = SimpleNamespace() + ev.type = SimpleNamespace(value=t) + ev.data = SimpleNamespace(message=content, content=content) + return ev + + cb(make_event("assistant.message", "ok")) + cb(make_event("session.idle")) + + session.send = send + session.destroy = AsyncMock() + + async def create_session(**kwargs: Any) -> Any: + captured["create_session_kwargs"] = kwargs + return session + + client = AsyncMock() + client.create_session = create_session + provider._client = client + return provider + + +@pytest.mark.asyncio +async def test_skill_directories_passed_to_create_session() -> None: + captured: dict[str, Any] = {} + provider = _build_mocked_provider(captured) + agent = AgentDef(name="a", model="custom-model", prompt="hi") + + await provider.execute( + agent, + context={}, + rendered_prompt="hi", + skill_directories=["/path/to/skill-a", "/path/to/skill-b"], + ) + + kwargs = captured["create_session_kwargs"] + assert kwargs["skill_directories"] == ["/path/to/skill-a", "/path/to/skill-b"] + + +@pytest.mark.asyncio +async def test_skill_directories_absent_when_not_supplied() -> None: + captured: dict[str, Any] = {} + provider = _build_mocked_provider(captured) + agent = AgentDef(name="a", model="custom-model", prompt="hi") + + await provider.execute(agent, context={}, rendered_prompt="hi") + + kwargs = captured["create_session_kwargs"] + assert "skill_directories" not in kwargs + + +@pytest.mark.asyncio +async def test_empty_skill_directories_not_passed() -> None: + """Empty list shouldn't set the kwarg — keeps default SDK behaviour.""" + captured: dict[str, Any] = {} + provider = _build_mocked_provider(captured) + agent = AgentDef(name="a", model="custom-model", prompt="hi") + + await provider.execute(agent, context={}, rendered_prompt="hi", skill_directories=[]) + + kwargs = captured["create_session_kwargs"] + assert "skill_directories" not in kwargs diff --git a/tests/test_providers/test_registry.py b/tests/test_providers/test_registry.py index 60003a8b..d6db3e61 100644 --- a/tests/test_providers/test_registry.py +++ b/tests/test_providers/test_registry.py @@ -26,6 +26,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback=None, + skill_directories: list[str] | None = None, ) -> AgentOutput: return AgentOutput(content={"result": "mock"}, raw_response="mock") diff --git a/tests/test_skills/__init__.py b/tests/test_skills/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_skills/test_executor_integration.py b/tests/test_skills/test_executor_integration.py new file mode 100644 index 00000000..62fdc0d5 --- /dev/null +++ b/tests/test_skills/test_executor_integration.py @@ -0,0 +1,155 @@ +"""Tests for skill resolution inside :class:`AgentExecutor`. + +Covers both provider variants of the parity contract: + +* Native-skill providers (``supports_native_skills = True``) skip eager + preamble injection — skill content is loaded by the SDK. +* Non-native providers eagerly inject ``SKILL.md`` + ``references/*.md`` + into the rendered prompt. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +from conductor.config.schema import AgentDef +from conductor.executor.agent import AgentExecutor +from conductor.providers.base import AgentOutput, AgentProvider, EventCallback +from conductor.providers.copilot import CopilotProvider +from conductor.skills.loader import _cached_skill_payload + + +class _StubNonNativeProvider(AgentProvider, abstract=True): + """Provider stub that does NOT support native skill loading. + + Exercises the eager preamble injection path the same way Claude + does today. Uses ``abstract=True`` to opt out of the + :class:`ProviderCapabilities` declaration enforced on production + providers — this is a test fake, not a real provider. + """ + + @property + def supports_native_skills(self) -> bool: + return False + + async def execute( + self, + agent: AgentDef, + context: dict[str, Any], + rendered_prompt: str, + tools: list[str] | None = None, + interrupt_signal: asyncio.Event | None = None, + event_callback: EventCallback | None = None, + skill_directories: list[str] | None = None, + ) -> AgentOutput: + return AgentOutput(content={"echo": rendered_prompt}) + + async def validate_connection(self) -> bool: + return True + + async def close(self) -> None: + return None + + +class TestCopilotProviderNativeSkills: + """Copilot owns native ``skill_directories``; preamble is NOT injected.""" + + def setup_method(self) -> None: + _cached_skill_payload.cache_clear() + + def test_no_skill_content_in_rendered_prompt(self) -> None: + provider = CopilotProvider() + executor = AgentExecutor(provider, workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world") + rendered = executor.render_prompt(agent, {}) + assert "" not in rendered + assert '' not in rendered + assert "Hello world" in rendered + + def test_provider_advertises_native_support(self) -> None: + assert CopilotProvider().supports_native_skills is True + + +class TestNonNativeProviderEagerInjection: + """Non-native providers receive skill content via the rendered prompt.""" + + def setup_method(self) -> None: + _cached_skill_payload.cache_clear() + + def test_not_injected_when_no_skills(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider()) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world") + rendered = executor.render_prompt(agent, {}) + assert "" not in rendered + assert "Hello world" in rendered + + def test_injected_when_agent_lists_skill(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider()) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", skills=["conductor"]) + rendered = executor.render_prompt(agent, {}) + assert "" in rendered + assert '' in rendered + assert "Hello world" in rendered + + def test_injected_when_workflow_default(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider(), workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world") + rendered = executor.render_prompt(agent, {}) + assert "" in rendered + + def test_agent_empty_list_opts_out_of_workflow_default(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider(), workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", skills=[]) + rendered = executor.render_prompt(agent, {}) + assert "" not in rendered + + def test_agent_overrides_workflow_default(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider(), workflow_skills=[]) + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", skills=["conductor"]) + rendered = executor.render_prompt(agent, {}) + assert "" in rendered + + def test_skills_appear_before_prompt(self) -> None: + executor = AgentExecutor(_StubNonNativeProvider(), workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="MY_PROMPT_HERE") + rendered = executor.render_prompt(agent, {}) + assert rendered.index("") < rendered.index("MY_PROMPT_HERE") + + def test_skills_appear_after_instructions_preamble(self) -> None: + preamble = "\nFollow conventions.\n\n\n" + executor = AgentExecutor( + _StubNonNativeProvider(), + instructions_preamble=preamble, + workflow_skills=["conductor"], + ) + agent = AgentDef(name="a", model="gpt-4", prompt="MY_PROMPT_HERE") + rendered = executor.render_prompt(agent, {}) + instr = rendered.index("") + skills = rendered.index("") + prompt = rendered.index("MY_PROMPT_HERE") + assert instr < skills < prompt + + +class TestResolveSkillsForAgent: + """Tri-state resolution: agent overrides workflow default.""" + + def test_agent_none_inherits_workflow(self) -> None: + executor = AgentExecutor(CopilotProvider(), workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="p") + assert executor._resolve_skills_for_agent(agent) == ["conductor"] + + def test_agent_list_overrides_workflow(self) -> None: + executor = AgentExecutor(CopilotProvider(), workflow_skills=[]) + agent = AgentDef(name="a", model="gpt-4", prompt="p", skills=["conductor"]) + assert executor._resolve_skills_for_agent(agent) == ["conductor"] + + def test_agent_empty_list_opts_out(self) -> None: + executor = AgentExecutor(CopilotProvider(), workflow_skills=["conductor"]) + agent = AgentDef(name="a", model="gpt-4", prompt="p", skills=[]) + assert executor._resolve_skills_for_agent(agent) == [] + + def test_default_when_nothing_set(self) -> None: + executor = AgentExecutor(CopilotProvider()) + agent = AgentDef(name="a", model="gpt-4", prompt="p") + assert executor._resolve_skills_for_agent(agent) == [] diff --git a/tests/test_skills/test_loader.py b/tests/test_skills/test_loader.py new file mode 100644 index 00000000..e9cdfd6f --- /dev/null +++ b/tests/test_skills/test_loader.py @@ -0,0 +1,55 @@ +"""Tests for the skill content loader (eager preamble injection path).""" + +from __future__ import annotations + +from pathlib import Path + +from conductor.skills import get_skill_directory, load_skill_content +from conductor.skills.loader import _cached_skill_payload + + +class TestLoadSkillContent: + def setup_method(self) -> None: + _cached_skill_payload.cache_clear() + + def test_empty_skills_returns_empty(self) -> None: + assert load_skill_content([]) == "" + + def test_wraps_in_skills_tag(self) -> None: + d = get_skill_directory("conductor") + result = load_skill_content([("conductor", d)]) + assert result.startswith("\n") + assert "\n\n" in result + + def test_wraps_each_skill_in_named_tag(self) -> None: + d = get_skill_directory("conductor") + result = load_skill_content([("conductor", d)]) + assert '' in result + assert "" in result + + def test_includes_skill_md_content(self) -> None: + d = get_skill_directory("conductor") + result = load_skill_content([("conductor", d)]) + assert "# SKILL.md" in result + + def test_includes_references(self) -> None: + d = get_skill_directory("conductor") + result = load_skill_content([("conductor", d)]) + # yaml-schema.md is a known reference in the conductor skill. + assert "# references/yaml-schema.md" in result + + def test_substantial_content(self) -> None: + d = get_skill_directory("conductor") + result = load_skill_content([("conductor", d)]) + size_kb = len(result.encode("utf-8")) / 1024 + assert size_kb > 50, f"Expected >50KB, got {size_kb:.1f}KB" + + def test_caches_per_dir(self) -> None: + d = get_skill_directory("conductor") + first = _cached_skill_payload(str(d), "conductor") + second = _cached_skill_payload(str(d), "conductor") + assert first is second + + def test_empty_dir_returns_empty(self, tmp_path: Path) -> None: + # No SKILL.md, no references/. + assert load_skill_content([("empty", tmp_path)]) == "" diff --git a/tests/test_skills/test_registry.py b/tests/test_skills/test_registry.py new file mode 100644 index 00000000..5709866b --- /dev/null +++ b/tests/test_skills/test_registry.py @@ -0,0 +1,62 @@ +"""Tests for the built-in skill registry.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from conductor.skills import ( + SkillNotFoundError, + get_skill_directory, + list_builtin_skills, + resolve_skill_directories, +) + + +class TestListBuiltinSkills: + def test_includes_conductor(self) -> None: + names = list_builtin_skills() + assert "conductor" in names + + def test_returns_sorted(self) -> None: + names = list_builtin_skills() + assert names == sorted(names) + + +class TestGetSkillDirectory: + def test_returns_existing_directory(self) -> None: + path = get_skill_directory("conductor") + assert isinstance(path, Path) + assert path.is_dir() + assert (path / "SKILL.md").is_file() + + def test_returns_absolute_path(self) -> None: + path = get_skill_directory("conductor") + assert path.is_absolute() + + def test_unknown_skill_raises(self) -> None: + with pytest.raises(SkillNotFoundError, match="Unknown skill"): + get_skill_directory("does-not-exist") + + def test_unknown_skill_lists_available(self) -> None: + with pytest.raises(SkillNotFoundError, match="conductor"): + get_skill_directory("does-not-exist") + + +class TestResolveSkillDirectories: + def test_empty_input_returns_empty(self) -> None: + assert resolve_skill_directories([]) == [] + + def test_single_skill(self) -> None: + dirs = resolve_skill_directories(["conductor"]) + assert len(dirs) == 1 + assert dirs[0].is_dir() + + def test_deduplicates(self) -> None: + dirs = resolve_skill_directories(["conductor", "conductor"]) + assert len(dirs) == 1 + + def test_unknown_raises(self) -> None: + with pytest.raises(SkillNotFoundError): + resolve_skill_directories(["conductor", "nope"]) diff --git a/tests/test_skills/test_schema.py b/tests/test_skills/test_schema.py new file mode 100644 index 00000000..11184ecb --- /dev/null +++ b/tests/test_skills/test_schema.py @@ -0,0 +1,93 @@ +"""Tests for the ``skills`` field on :class:`AgentDef` and :class:`RuntimeConfig`.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from conductor.config.schema import AgentDef, GateOption, RuntimeConfig + + +class TestAgentDefSkills: + def test_defaults_to_none(self) -> None: + agent = AgentDef(name="a", model="gpt-4", prompt="Hello") + assert agent.skills is None + + def test_empty_list_means_opt_out(self) -> None: + agent = AgentDef(name="a", model="gpt-4", prompt="Hello", skills=[]) + assert agent.skills == [] + + def test_explicit_list(self) -> None: + agent = AgentDef(name="a", model="gpt-4", prompt="Hello", skills=["conductor"]) + assert agent.skills == ["conductor"] + + def test_unknown_skill_rejected(self) -> None: + with pytest.raises(ValidationError, match="Unknown skill"): + AgentDef(name="a", model="gpt-4", prompt="Hello", skills=["does-not-exist"]) + + def test_empty_string_rejected(self) -> None: + with pytest.raises(ValidationError, match="non-empty strings"): + AgentDef(name="a", model="gpt-4", prompt="Hello", skills=[""]) + + def test_forbidden_on_script_agent(self) -> None: + with pytest.raises(ValidationError, match="script agents cannot have 'skills'"): + AgentDef(name="s", type="script", command="echo hi", skills=["conductor"]) + + def test_forbidden_on_workflow_agent(self) -> None: + with pytest.raises(ValidationError, match="workflow agents cannot have 'skills'"): + AgentDef(name="w", type="workflow", workflow="sub.yaml", skills=["conductor"]) + + def test_forbidden_on_human_gate(self) -> None: + with pytest.raises(ValidationError, match="human_gate agents cannot have 'skills'"): + AgentDef( + name="g", + type="human_gate", + prompt="Choose:", + options=[GateOption(label="Yes", value="y", route="next")], + skills=["conductor"], + ) + + def test_forbidden_on_wait_agent(self) -> None: + with pytest.raises(ValidationError, match="wait agents cannot have 'skills'"): + AgentDef(name="w", type="wait", duration="1s", skills=["conductor"]) + + def test_forbidden_on_set_agent(self) -> None: + with pytest.raises(ValidationError, match="set agents cannot have 'skills'"): + AgentDef(name="s", type="set", value="hello", skills=["conductor"]) + + def test_forbidden_on_terminate_agent(self) -> None: + with pytest.raises(ValidationError, match="terminate agents cannot have 'skills'"): + AgentDef( + name="t", + type="terminate", + status="success", + reason="done", + skills=["conductor"], + ) + + def test_allowed_on_default_type_agent(self) -> None: + agent = AgentDef(name="r", model="gpt-4", prompt="p", skills=["conductor"]) + assert agent.skills == ["conductor"] + assert agent.type is None + + def test_allowed_on_explicit_agent_type(self) -> None: + agent = AgentDef(name="r", type="agent", model="gpt-4", prompt="p", skills=["conductor"]) + assert agent.skills == ["conductor"] + + +class TestRuntimeConfigSkills: + def test_defaults_to_empty_list(self) -> None: + config = RuntimeConfig() + assert config.skills == [] + + def test_can_be_set(self) -> None: + config = RuntimeConfig(skills=["conductor"]) + assert config.skills == ["conductor"] + + def test_unknown_skill_rejected(self) -> None: + with pytest.raises(ValidationError, match="Unknown skill"): + RuntimeConfig(skills=["does-not-exist"]) + + def test_empty_string_rejected(self) -> None: + with pytest.raises(ValidationError, match="non-empty strings"): + RuntimeConfig(skills=[""]) From 7d0b668c29980c218724615c369c9883ba26c4dd Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Fri, 5 Jun 2026 07:34:33 -0700 Subject: [PATCH 2/2] fix(types): satisfy ty checks for _MockProvider and dialog_evaluator - Add skill_directories param to _MockProvider.execute for LSP compliance - Assert agent.dialog is not None in _run_evaluator (guarded by caller) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/conductor/cli/run.py | 1 + src/conductor/engine/dialog_evaluator.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/conductor/cli/run.py b/src/conductor/cli/run.py index e167d30a..7f60cf30 100644 --- a/src/conductor/cli/run.py +++ b/src/conductor/cli/run.py @@ -1748,6 +1748,7 @@ async def execute( tools: list[str] | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: Any = None, + skill_directories: list[str] | None = None, ) -> AgentOutput: return AgentOutput(content={}, raw_response="") diff --git a/src/conductor/engine/dialog_evaluator.py b/src/conductor/engine/dialog_evaluator.py index c5544166..f7a0d1fa 100644 --- a/src/conductor/engine/dialog_evaluator.py +++ b/src/conductor/engine/dialog_evaluator.py @@ -127,6 +127,8 @@ async def _run_evaluator( except (TypeError, ValueError): output_str = str(output) + assert agent.dialog is not None, "_run_evaluator requires dialog config" + system_prompt = EVALUATOR_SYSTEM_PROMPT.format( trigger_prompt=agent.dialog.trigger_prompt, )