diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 992265080..4a1ceaab5 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -152,3 +152,34 @@ jobs:
 
       - name: Run package install test
         run: bash scripts/test_package_install.sh
+
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.14"
+
+      - name: Install the project
+        run: |
+          uv sync --locked --group dev
+
+      - name: Generate reference docs
+        run: uv run scripts/docs.py generate
+
+      - name: Fail on partial docs generation
+        run: test ! -f docs/docs/_generated/_generation_warnings.md
+
+      - name: Check generated docs are committed
+        run: git diff --exit-code -- docs/docs/_generated
+
+      - name: Build docs
+        run: uv run scripts/docs.py build
diff --git a/docs/docs/_generated/model_aliases_google.md b/docs/docs/_generated/model_aliases_google.md
index 3a9073282..8c2d57d4c 100644
--- a/docs/docs/_generated/model_aliases_google.md
+++ b/docs/docs/_generated/model_aliases_google.md
@@ -1,10 +1,12 @@
 | Model Alias | Maps to | Model Alias | Maps to |
 | --- | --- | --- | --- |
-| `gemini` | `gemini-3.1-pro-preview` | `gemini2` | `gemini-2.0-flash` |
-| `gemini-2.0-flash` | `gemini-2.0-flash` | `gemini25` | `gemini-2.5-flash` |
-| `gemini-2.5-flash` | `gemini-2.5-flash` | `gemini25pro` | `gemini-2.5-pro` |
-| `gemini-2.5-pro` | `gemini-2.5-pro` | `gemini3` | `gemini-3-pro-preview` |
-| `gemini-3-flash-preview` | `gemini-3-flash-preview` | `gemini3.1` | `gemini-3.1-pro-preview` |
-| `gemini-3-pro-preview` | `gemini-3-pro-preview` | `gemini3.1flashlite` | `gemini-3.1-flash-lite-preview` |
+| `gemini` | `gemini-3.1-pro-preview` | `gemini25` | `gemini-2.5-flash` |
+| `gemini-2.0-flash` | `gemini-2.0-flash` | `gemini25pro` | `gemini-2.5-pro` |
+| `gemini-2.5-flash` | `gemini-2.5-flash` | `gemini3` | `gemini-3-pro-preview` |
+| `gemini-2.5-pro` | `gemini-2.5-pro` | `gemini3.1` | `gemini-3.1-pro-preview` |
+| `gemini-3-flash-preview` | `gemini-3-flash-preview` | `gemini3.1flashlite` | `gemini-3.1-flash-lite-preview` |
+| `gemini-3-pro-preview` | `gemini-3-pro-preview` | `gemini3.5flash` | `gemini-3.5-flash` |
 | `gemini-3.1-flash-lite-preview` | `gemini-3.1-flash-lite-preview` | `gemini31pro` | `gemini-3.1-pro-preview` |
-| `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | `gemini3flash` | `gemini-3-flash-preview` |
+| `gemini-3.1-pro-preview` | `gemini-3.1-pro-preview` | `gemini35` | `gemini-3.5-flash` |
+| `gemini-3.5-flash` | `gemini-3.5-flash` | `gemini35flash` | `gemini-3.5-flash` |
+| `gemini2` | `gemini-2.0-flash` | `gemini3flash` | `gemini-3-flash-preview` |
diff --git a/docs/docs/_generated/model_aliases_hf.md b/docs/docs/_generated/model_aliases_hf.md
index ac1304094..927128ece 100644
--- a/docs/docs/_generated/model_aliases_hf.md
+++ b/docs/docs/_generated/model_aliases_hf.md
@@ -1,7 +1,7 @@
 | Model Alias | Maps to |
 | --- | --- |
-| `deepseek-hf` | `hf.deepseek-ai/DeepSeek-V4-Pro:together` |
 | `deepseek-ai/deepseek-v4-pro` | `deepseek-ai/deepseek-v4-pro` |
+| `deepseek-hf` | `hf.deepseek-ai/DeepSeek-V4-Pro:together` |
 | `deepseek32` | `hf.deepseek-ai/DeepSeek-V3.2:fireworks-ai` |
 | `deepseek4-hf` | `hf.deepseek-ai/DeepSeek-V4-Pro:together` |
 | `deepseek4pro-hf` | `hf.deepseek-ai/DeepSeek-V4-Pro:together` |
diff --git a/docs/docs/_generated/models_reference.md b/docs/docs/_generated/models_reference.md
index b1fea0426..9b1528c86 100644
--- a/docs/docs/_generated/models_reference.md
+++ b/docs/docs/_generated/models_reference.md
@@ -27,11 +27,11 @@
 | `opus46` | `anthropic` | Text, Vision, Document | `json` (schema) | effort: `auto`, `low`, `medium`, `high`, `max`, `off`<br>Example: `opus46.auto` | — | `web_search` (web_search_20260209)<br>`web_fetch` (web_fetch_20260209)<br>beta: `code-execution-web-tools-2026-02-09` |
 | `opus` | `anthropic` | Text, Vision, Document | `json` (schema) | effort: `auto`, `low`, `medium`, `high`, `xhigh`, `max`, `off`<br>Example: `opus.auto` | — | `web_search` (web_search_20260209)<br>`web_fetch` (web_fetch_20260209)<br>beta: `code-execution-web-tools-2026-02-09` |
 | `codexspark` | `codexresponses` | Text | `json` (schema) | — | — | — |
-| `deepseek` | `deepseek` | Text | `json` (schema) | effort: `high`, `max`, `off`<br>Example: `deepseek.high` | — | — |
 | `deepseek-reasoner` | `deepseek` | Text | `json` (schema) | effort: `high`, `max`, `off`<br>Example: `deepseek-reasoner.high` | — | — |
 | `deepseek.deepseek-ai/deepseek-v3.1` | `deepseek` | Text | `json` (schema) | — | — | — |
 | `deepseek3` | `deepseek` | Text | `json` (schema) | — | — | — |
 | `deepseek4flash` | `deepseek` | Text | `json` (schema) | effort: `high`, `max`, `off`<br>Example: `deepseek4flash.high` | — | — |
+| `deepseek` | `deepseek` | Text | `json` (schema) | effort: `high`, `max`, `off`<br>Example: `deepseek.high` | — | — |
 | `passthrough` | `fast-agent` | Text | `json` (schema) | — | — | — |
 | `playback` | `fast-agent` | Text | `json` (schema) | — | — | — |
 | `silent` | `fast-agent` | Text | `json` (schema) | — | — | — |
@@ -39,17 +39,18 @@
 | `gemini25` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini25.auto` | — | — |
 | `gemini25pro` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini25pro.auto` | — | — |
 | `gemini2` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | — | — | — |
-| `gemini3.1flashlite` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini3.1flashlite.auto` | — | — |
-| `gemini3` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini3.auto` | — | — |
-| `gemini3flash` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini3flash.auto` | — | — |
-| `gemini` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `auto`, `minimal`, `low`, `medium`, `high`, `off`<br>Example: `gemini.auto` | — | — |
+| `gemini3.1flashlite` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `minimal`, `low`, `medium`, `high`<br>Example: `gemini3.1flashlite.medium` | — | — |
+| `gemini35` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `minimal`, `low`, `medium`, `high`<br>Example: `gemini35.medium` | — | — |
+| `gemini3` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `minimal`, `low`, `medium`, `high`<br>Example: `gemini3.medium` | — | — |
+| `gemini3flash` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `minimal`, `low`, `medium`, `high`<br>Example: `gemini3flash.medium` | — | — |
+| `gemini` | `google` | Text, Vision, Document, Audio, Video | `json` (schema) | effort: `minimal`, `low`, `medium`, `high`<br>Example: `gemini.medium` | — | — |
 | `groq.deepseek-r1-distill-llama-70b` | `groq` | Text | `json` (object) | — | — | — |
 | `groq.qwen/qwen3-32b` | `groq` | Text | `json` (object) | — | — | — |
 | `moonshotai/kimi-k2-instruct-0905` | `groq` | Text | `json` (schema) | — | — | — |
 | `moonshotai/kimi-k2-thinking` | `groq` | Text | `json` (schema) | — | — | — |
 | `moonshotai/kimi-k2` | `groq` | Text | `json` (schema) | — | — | — |
-| `deepseek32` | `hf` | Text | `json` (schema) | — | — | — |
 | `deepseek-hf` | `hf` | Text | `json` (schema) | — | — | — |
+| `deepseek32` | `hf` | Text | `json` (schema) | — | — | — |
 | `glm47` | `hf` | Text | `json` (schema) | toggle: `on`, `off`<br>Example: `glm47?reasoning=off` | — | — |
 | `glm5` | `hf` | Text | `json` (schema) | toggle: `on`, `off`<br>Example: `glm5?reasoning=off` | — | — |
 | `glm` | `hf` | Text | `json` (schema) | toggle: `on`, `off`<br>Example: `glm?reasoning=off` | — | — |
@@ -108,8 +109,8 @@
 | `grok-4-1-fast-reasoning` | `xai` | Text, Vision | `json` (schema) | — | — | — |
 | `grok-4-fast-reasoning` | `xai` | Text, Vision | `json` (schema) | — | — | — |
 | `grok-4-fast` | `xai` | Text, Vision | `json` (schema) | — | — | — |
-| `grok-4-latest` | `xai` | Text | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4-latest.low` | — | — |
-| `grok-4.3-latest` | `xai` | Text | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4.3-latest.low` | — | — |
-| `grok-4` | `xai` | Text | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4.low` | — | — |
-| `grok` | `xai` | Text | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok.low` | — | — |
-| `grok` | `xai` | Text | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok.low` | — | — |
+| `grok-4-latest` | `xai` | Text, Vision | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4-latest.low` | — | — |
+| `grok-4.3-latest` | `xai` | Text, Vision | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4.3-latest.low` | — | — |
+| `grok-4` | `xai` | Text, Vision | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok-4.low` | — | — |
+| `grok` | `xai` | Text, Vision | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok.low` | — | — |
+| `grok` | `xai` | Text, Vision | `json` (schema) | effort: `none`, `low`, `medium`, `high`, `off`<br>Example: `grok.low` | — | — |
diff --git a/docs/docs/_generated/request_params_reference.md b/docs/docs/_generated/request_params_reference.md
index 891de97ec..3eb8c7a50 100644
--- a/docs/docs/_generated/request_params_reference.md
+++ b/docs/docs/_generated/request_params_reference.md
@@ -9,28 +9,28 @@
 | --- | --- | --- | --- |
 | `task` | `mcp.types.TaskMetadata | None` | `None` |  |
 | `meta` | `mcp.types.RequestParams.Meta | None` | `None` |  |
-| `messages` | `list` | `[]` |  |
+| `messages` | `list[mcp.types.SamplingMessage]` | `[]` |  |
 | `modelPreferences` | `mcp.types.ModelPreferences | None` | `None` |  |
 | `systemPrompt` | `str | None` | `None` |  |
-| `includeContext` | `Optional` | `None` |  |
+| `includeContext` | `Literal['none', 'thisServer', 'allServers'] | None` | `None` |  |
 | `temperature` | `float | None` | `None` |  |
 | `maxTokens` | `int` | `2048` |  |
 | `stopSequences` | `list[str] | None` | `None` |  |
-| `metadata` | `dict[str, typing.Any] | None` | `None` |  |
+| `metadata` | `dict[str, Any] | None` | `None` |  |
 | `tools` | `list[mcp.types.Tool] | None` | `None` |  |
 | `toolChoice` | `mcp.types.ToolChoice | None` | `None` |  |
 | `model` | `str | None` | `None` |  |
 | `use_history` | `bool` | `True` |  |
-| `max_iterations` | `int` | `99` |  |
+| `max_iterations` | `int` | `199` |  |
 | `parallel_tool_calls` | `bool` | `True` |  |
-| `response_format` | `typing.Any | None` | `None` |  |
-| `structured_schema` | `dict[str, typing.Any] | None` | `None` |  |
-| `structured_tool_policy` | `Literal` | `'auto'` |  |
-| `template_vars` | `dict` | `PydanticUndefined` |  |
-| `mcp_metadata` | `dict[str, typing.Any] | None` | `None` |  |
-| `tool_execution_handler` | `typing.Any | None` | `None` |  |
+| `response_format` | `Any | None` | `None` |  |
+| `structured_schema` | `dict[str, Any] | None` | `None` |  |
+| `structured_tool_policy` | `Literal['auto', 'always', 'defer', 'no_tools']` | `'auto'` |  |
+| `template_vars` | `dict[str, Any]` | `PydanticUndefined` |  |
+| `mcp_metadata` | `dict[str, Any] | None` | `None` |  |
+| `tool_execution_handler` | `Any | None` | `None` |  |
 | `emit_loop_progress` | `bool` | `False` |  |
-| `tool_result_mode` | `Literal` | `'postprocess'` |  |
+| `tool_result_mode` | `Literal['postprocess', 'passthrough', 'selectable']` | `'postprocess'` |  |
 | `batch_context` | `fast_agent.llm.request_params.BatchRequestContext | None` | `None` |  |
 | `streaming_timeout` | `float | None` | `300.0` |  |
 | `top_p` | `float | None` | `None` |  |
@@ -39,4 +39,4 @@
 | `presence_penalty` | `float | None` | `None` |  |
 | `frequency_penalty` | `float | None` | `None` |  |
 | `repetition_penalty` | `float | None` | `None` |  |
-| `service_tier` | `Optional` | `None` |  |
+| `service_tier` | `Literal['fast', 'flex'] | None` | `None` |  |
diff --git a/docs/docs/_generated/workflows_reference.md b/docs/docs/_generated/workflows_reference.md
index 1f4da0770..7e11317c3 100644
--- a/docs/docs/_generated/workflows_reference.md
+++ b/docs/docs/_generated/workflows_reference.md
@@ -10,35 +10,35 @@ These signatures are generated from the installed `fast_agent` package to preven
 ### `chain`
 
 ```python
-fast.chain(name: str, *, sequence: list[str], instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl | None = None, cumulative: bool = False, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.chain(name: str, *, sequence: list[str], instruction: str | pathlib.Path | pydantic.networks.AnyUrl | None = None, cumulative: bool = False, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `parallel`
 
 ```python
-fast.parallel(name: str, *, fan_out: list[str], fan_in: str | None = None, instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl | None = None, include_request: bool = True, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.parallel(name: str, *, fan_out: list[str], fan_in: str | None = None, instruction: str | pathlib.Path | pydantic.networks.AnyUrl | None = None, include_request: bool = True, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `evaluator_optimizer`
 
 ```python
-fast.evaluator_optimizer(name: str, *, generator: str, evaluator: str, instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl | None = None, min_rating: str = 'GOOD', max_refinements: int = 3, refinement_instruction: str | None = None, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.evaluator_optimizer(name: str, *, generator: str, evaluator: str, instruction: str | pathlib.Path | pydantic.networks.AnyUrl | None = None, min_rating: str = 'GOOD', max_refinements: int = 3, refinement_instruction: str | None = None, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `router`
 
 ```python
-fast.router(name: str, *, agents: list[str], instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl | None = None, servers: list[str] = [], tools: dict[str, list[str]] | None = None, resources: dict[str, list[str]] | None = None, prompts: dict[str, list[str]] | None = None, model: str | None = None, use_history: bool = False, request_params: fast_agent.llm.request_params.RequestParams | None = None, human_input: bool = False, default: bool = False, elicitation_handler: mcp.client.session.ElicitationFnT | None = None, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.router(name: str, *, agents: list[str], instruction: str | pathlib.Path | pydantic.networks.AnyUrl | None = None, servers: list[str] = [], tools: dict[str, list[str]] | None = None, resources: dict[str, list[str]] | None = None, prompts: dict[str, list[str]] | None = None, model: str | None = None, use_history: bool = False, request_params: fast_agent.llm.request_params.RequestParams | None = None, human_input: bool = False, default: bool = False, elicitation_handler: mcp.client.session.ElicitationFnT | None = None, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `orchestrator`
 
 ```python
-fast.orchestrator(name: str, *, agents: list[str], instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl = '\n    You are an expert planner. Given an objective task and a list of Agents\n    (which are collections of capabilities), your job is to break down the objective\n    into a series of steps, which can be performed by these agents.\n    ', model: str | None = None, request_params: fast_agent.llm.request_params.RequestParams | None = None, use_history: bool = False, human_input: bool = False, plan_type: Literal['full', 'iterative'] = 'full', plan_iterations: int = 5, default: bool = False, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.orchestrator(name: str, *, agents: list[str], instruction: str | pathlib.Path | pydantic.networks.AnyUrl = '\n    You are an expert planner. Given an objective task and a list of Agents\n    (which are collections of capabilities), your job is to break down the objective\n    into a series of steps, which can be performed by these agents.\n    ', model: str | None = None, request_params: fast_agent.llm.request_params.RequestParams | None = None, use_history: bool = False, human_input: bool = False, plan_type: Literal['full', 'iterative'] = 'full', plan_iterations: int = 5, default: bool = False, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `iterative_planner`
 
 ```python
-fast.iterative_planner(name: str, *, agents: list[str], instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl = "\nYou are an expert planner, able to Orchestrate complex tasks by breaking them down in to\nmanageable steps, and delegating tasks to Agents.\n\nYou work iteratively - given an Objective, you consider the current state of the plan,\ndecide the next step towards the goal. You document those steps and create clear instructions\nfor execution by the Agents, being specific about what you need to know to assess task completion. \n\nNOTE: A 'Planning Step' has a description, and a list of tasks that can be delegated \nand executed in parallel.\n\nAgents have a 'description' describing their primary function, and a set of 'skills' that\nrepresent Tools they can use in completing their function.\n\nThe following Agents are available to you:\n\n{{agents}}\n\nYou must specify the Agent name precisely when generating a Planning Step. \n\n", model: str | None = None, request_params: fast_agent.llm.request_params.RequestParams | None = None, plan_iterations: int = -1, default: bool = False, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.iterative_planner(name: str, *, agents: list[str], instruction: str | pathlib.Path | pydantic.networks.AnyUrl = "\nYou are an expert planner, able to Orchestrate complex tasks by breaking them down in to\nmanageable steps, and delegating tasks to Agents.\n\nYou work iteratively - given an Objective, you consider the current state of the plan,\ndecide the next step towards the goal. You document those steps and create clear instructions\nfor execution by the Agents, being specific about what you need to know to assess task completion. \n\nNOTE: A 'Planning Step' has a description, and a list of tasks that can be delegated \nand executed in parallel.\n\nAgents have a 'description' describing their primary function, and a set of 'skills' that\nrepresent Tools they can use in completing their function.\n\nThe following Agents are available to you:\n\n{{agents}}\n\nYou must specify the Agent name precisely when generating a Planning Step. \n\n", model: str | None = None, request_params: fast_agent.llm.request_params.RequestParams | None = None, plan_iterations: int = -1, default: bool = False, api_key: str | None = None) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
 ### `maker`
 
 ```python
-fast.maker(name: str, *, worker: str, k: int = 3, max_samples: int = 50, match_strategy: str = 'exact', red_flag_max_length: int | None = None, instruction: str | pathlib._local.Path | pydantic.networks.AnyUrl | None = None, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
+fast.maker(name: str, *, worker: str, k: int = 3, max_samples: int = 50, match_strategy: str = 'exact', red_flag_max_length: int | None = None, instruction: str | pathlib.Path | pydantic.networks.AnyUrl | None = None, default: bool = False) -> Callable[[Callable[~P, collections.abc.Coroutine[Any, Any, +R]]], Callable[~P, collections.abc.Coroutine[Any, Any, +R]]]
 ```
diff --git a/docs/docs/a2a/api.md b/docs/docs/a2a/api.md
new file mode 100644
index 000000000..53f160722
--- /dev/null
+++ b/docs/docs/a2a/api.md
@@ -0,0 +1,227 @@
+---
+title: A2A API
+description: Use fast-agent A2A client and server support from Python and raw A2A HTTP APIs.
+---
+
+# A2A API
+
+The fast-agent A2A integration is designed to feel like working with normal
+fast-agent agents. The local API surface uses `PromptMessageExtended`, stream
+listeners, and normal fast-agent history behavior.
+
+## Client API
+
+Create an `A2ARemoteAgent` directly when you want a remote A2A server behind the
+fast-agent `AgentProtocol` interface:
+
+```python
+from mcp.types import TextContent
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.mcp.prompt import Prompt
+from fast_agent.types import PromptMessageExtended
+
+agent = A2ARemoteAgent(
+    config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=True),
+    a2a_config=A2AAgentConfig(
+        url="http://127.0.0.1:41242",
+        transport="JSONRPC",
+    ),
+)
+
+await agent.initialize()
+try:
+    response = await agent.generate_impl(
+        [
+            PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text="hello")],
+            )
+        ]
+    )
+    print(response.all_text())
+finally:
+    await agent.shutdown()
+```
+
+`A2AAgentConfig` supports:
+
+```python
+A2AAgentConfig(
+    url="https://agent.example.com",
+    transport="JSONRPC",
+    streaming=True,
+    polling=False,
+    accepted_output_modes=["text/plain", "application/json", "image/*"],
+    headers={"Authorization": "Bearer ..."},
+    relative_card_path="/.well-known/agent-card.json",
+    request_timeout_seconds=120,
+)
+```
+
+## Client Streaming API
+
+Register a normal fast-agent stream listener before calling `generate_impl`:
+
+```python
+chunks: list[str] = []
+
+remove_listener = agent.add_stream_listener(lambda chunk: chunks.append(chunk.text))
+try:
+    response = await agent.generate_impl([message])
+finally:
+    remove_listener()
+```
+
+For A2A streaming, `chunk.text` contains text from message events and artifact
+updates. Artifact updates are also assembled into the returned
+`PromptMessageExtended`.
+
+## Client `INPUT_REQUIRED`
+
+When the remote server returns `TASK_STATE_INPUT_REQUIRED`, the response has:
+
+```python
+response.stop_reason == LlmStopReason.PAUSE
+```
+
+The same `A2ARemoteAgent` instance keeps the pending remote task id. The next
+`generate_impl` call sends the follow-up message to that task:
+
+```python
+first = await agent.generate_impl([Prompt.user("need input")])
+assert first.stop_reason == LlmStopReason.PAUSE
+
+second = await agent.generate_impl([Prompt.user("blue")])
+```
+
+Use `agent.reset_a2a_state()` to clear the pending task and start a new remote
+context.
+
+## Server API
+
+Most deployments should use:
+
+```bash
+uv run fast-agent serve a2a --agent-cards ./agents
+```
+
+If you are embedding the server in Python, use `AgentA2AServer` with an existing
+fast-agent `AgentInstance` factory:
+
+```python
+from fast_agent.a2a.server import AgentA2AServer
+
+server = AgentA2AServer(
+    primary_instance=bootstrap_instance,
+    create_instance=create_instance,
+    dispose_instance=dispose_instance,
+    server_name="research agents",
+    host="127.0.0.1",
+    port=41241,
+    instance_scope="connection",
+)
+
+app = server.asgi_app()
+```
+
+`instance_scope` accepts the same values as `fast-agent serve`:
+
+| Scope | Server API behavior |
+|---|---|
+| `shared` | Reuse `primary_instance` for all A2A messages. |
+| `connection` | Call `create_instance` for each new A2A `context_id` and reuse that instance for later messages in the same context. |
+| `request` | Call `create_instance` and `dispose_instance` for each A2A message. |
+
+Each served agent's `use_history` setting still controls whether prior turns are
+included in model calls inside the selected instance scope.
+
+## Raw A2A JSON-RPC
+
+External clients can call the served fast-agent endpoint directly:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "method": "SendStreamingMessage",
+  "params": {
+    "message": {
+      "role": "ROLE_USER",
+      "messageId": "msg-1",
+      "parts": [
+        {"text": "hello"}
+      ]
+    }
+  }
+}
+```
+
+The endpoint behaves as one A2A remote agent. Requests route to the fast-agent
+default agent, which can orchestrate or delegate internally. fast-agent servers
+also accept a non-portable metadata routing extension, such as
+`{"agent": "researcher"}`, for fast-agent-to-fast-agent integrations.
+
+To continue a session, include the returned `contextId`. To continue an
+`INPUT_REQUIRED` task, include both the returned `contextId` and `taskId`.
+
+## Raw HTTP+JSON
+
+The REST binding is exposed under `/a2a/rest`. For example:
+
+```http
+POST /a2a/rest/message:stream HTTP/1.1
+Content-Type: application/json
+
+{
+  "message": {
+    "role": "ROLE_USER",
+    "messageId": "msg-1",
+    "parts": [{"text": "hello"}]
+  }
+}
+```
+
+Responses are A2A stream response objects containing exactly one of `task`,
+`message`, `statusUpdate`, or `artifactUpdate`.
+
+## Content Mapping
+
+Inbound A2A parts are converted to fast-agent prompt content:
+
+| A2A part | fast-agent prompt content |
+|---|---|
+| `text` | `TextContent` |
+| `url` | `ResourceLink` when valid, otherwise Markdown link text |
+| `raw` image bytes | `ImageContent` |
+| `raw` non-image bytes | `EmbeddedResource` with `BlobResourceContents` |
+| `data` | formatted JSON text |
+
+fast-agent responses are converted back to A2A artifact parts using the content
+types available in `PromptMessageExtended`.
+
+For structured JSON, A2A supports JSON-compatible `data` parts and also permits
+JSON returned as text artifacts. fast-agent keeps model text as text, but maps an
+`EmbeddedResource` containing `TextResourceContents` with
+`mimeType="application/json"` to an A2A `data` part:
+
+```python
+from mcp.types import EmbeddedResource, TextResourceContents
+from pydantic import AnyUrl
+
+PromptMessageExtended(
+    role="assistant",
+    content=[
+        EmbeddedResource(
+            type="resource",
+            resource=TextResourceContents(
+                uri=AnyUrl("resource:///tickets.json"),
+                mimeType="application/json",
+                text='{"tickets": [{"id": "REQ123", "status": "open"}]}',
+            ),
+        )
+    ],
+)
+```
diff --git a/docs/docs/a2a/client.md b/docs/docs/a2a/client.md
new file mode 100644
index 000000000..c00d28181
--- /dev/null
+++ b/docs/docs/a2a/client.md
@@ -0,0 +1,338 @@
+---
+title: A2A Client
+description: Use fast-agent as a client for remote Agent2Agent (A2A) agents.
+---
+
+# A2A Client
+
+fast-agent can connect to remote A2A agents as normal fast-agent agents. A
+remote A2A agent can be used from the CLI, TUI, AgentCards, or the Python API.
+
+## CLI
+
+Use `--a2a` for an ad hoc remote agent:
+
+```bash
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport JSONRPC \
+  --message "hello"
+```
+
+`--a2a` points at the remote agent base URL. fast-agent resolves the AgentCard
+from `/.well-known/agent-card.json`, selects a supported transport, and sends the
+message through the A2A SDK client.
+
+When no transport is specified, fast-agent asks the SDK to use either supported
+HTTP binding: `JSONRPC` or `HTTP+JSON`. Set `--a2a-transport` only when you want
+to force one binding.
+
+Use `--a2a-oauth` or `--no-a2a-oauth` to force or disable browser OAuth for an
+ad hoc remote agent:
+
+```bash
+uv run fast-agent -x \
+  --a2a https://research.example.com \
+  --a2a-oauth \
+  --message "hello"
+```
+
+Use `--auth` when the remote A2A endpoint itself expects bearer auth. This uses
+the standard `Authorization` header, including for Hugging Face Space endpoints:
+
+```bash
+uv run fast-agent -x \
+  --a2a https://agent-demo.hf.space \
+  --auth "$HF_TOKEN" \
+  --message "hello"
+```
+
+Supported HTTP transports:
+
+| Canonical | Useful aliases |
+|---|---|
+| `JSONRPC` | `jsonrpc`, `json-rpc`, `rpc` |
+| `HTTP+JSON` | `http`, `http+json`, `rest` |
+
+gRPC is not part of fast-agent's A2A support target.
+
+### CLI Recording
+
+This recording shows the expected shape of a streamed remote A2A response from
+the deterministic fake server.
+
+<div class="a2a-terminal-demo">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/asciinema-player.css">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/catppuccin.css">
+  <div id="a2a-client-cli-player"></div>
+</div>
+
+<script src="../../assets/vendor/asciinema-player/asciinema-player.min.js"></script>
+<script>
+  (function () {
+    function renderClientCliCast() {
+      var target = document.getElementById("a2a-client-cli-player");
+      if (!target || !window.AsciinemaPlayer || target.dataset.loaded === "true") {
+        return;
+      }
+      target.dataset.loaded = "true";
+      window.AsciinemaPlayer.create("../../assets/a2a/a2a-client-cli.cast", target, {
+        cols: 96,
+        rows: 18,
+        preload: true,
+        speed: 1,
+        idleTimeLimit: 1,
+        fit: "width",
+        theme: "fast-agent-dark"
+      });
+    }
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", renderClientCliCast);
+    } else {
+      renderClientCliCast();
+    }
+    if (window.document$ && window.document$.subscribe) {
+      window.document$.subscribe(renderClientCliCast);
+    }
+  })();
+</script>
+
+## AgentCard
+
+Use a checked-in AgentCard when the remote A2A agent should be reusable:
+
+```yaml
+type: a2a
+name: research_remote
+url: https://research.example.com
+transport: JSONRPC
+```
+
+Then run:
+
+```bash
+uv run fast-agent -x --agent-cards ./agents --agent research_remote
+```
+
+A2A cards also support:
+
+```yaml
+streaming: true
+polling: false
+accepted_output_modes:
+  - text/plain
+  - application/json
+  - image/*
+request_timeout_seconds: 120
+headers:
+  Authorization: "Bearer ${A2A_TOKEN}"
+auth:
+  oauth: true
+  persist: keyring
+relative_card_path: "/.well-known/agent-card.json"
+```
+
+For Hugging Face URLs (`hf.co`, `huggingface.co`, and `*.hf.space`),
+fast-agent has two different auth policies:
+
+- Ambient Hugging Face auth discovers `HF_TOKEN` or the local Hub login and adds
+  it only to Hugging Face URLs. It uses `Authorization` for `hf.co` and
+  `huggingface.co`, and `X-HF-Authorization` for `*.hf.space`. This is intended
+  for ordinary HF MCP calls and Space apps that consume the caller's HF token
+  without taking over app-level `Authorization`.
+- Explicit endpoint auth uses `Authorization: Bearer ...`, including for
+  `*.hf.space`. This is the policy behind `--auth`, checked-in `headers:
+  Authorization: ...`, and OAuth-managed A2A/MCP servers.
+
+For `*.hf.space` A2A URLs, fast-agent first fetches the public AgentCard. If the
+card advertises HTTP bearer security and no explicit headers were configured,
+the client treats the Space as a protected endpoint: a discovered local
+`HF_TOKEN`/Hub login is sent as `Authorization`, not `X-HF-Authorization`. If no
+local token is available and OAuth is allowed, the client uses the OAuth flow.
+
+When a remote AgentCard advertises OAuth2 or OpenID Connect security schemes,
+fast-agent can reuse the existing browser OAuth flow. If `auth` is omitted, the
+A2A client enables that flow only for OAuth/OIDC cards. Set `auth.oauth: false`
+to disable browser OAuth, or `auth.oauth: true` to allow OAuth challenge handling
+even before the card requires it. The flow uses the same local callback,
+paste-URL fallback, client metadata URL, and keyring storage behavior as MCP URL
+connections.
+
+## TUI
+
+Inside the interactive prompt, connect a remote A2A agent at runtime:
+
+```text
+/a2a connect http://127.0.0.1:41242 --transport JSONRPC --name research_remote
+```
+
+Use `--oauth` or `--no-oauth` to force or disable browser OAuth for a runtime
+connection:
+
+```text
+/a2a connect https://research.example.com --oauth --name research_remote
+```
+
+## Python API
+
+Use `A2ARemoteAgent` directly when constructing agents in code:
+
+```python
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.config import MCPServerAuthSettings
+
+remote_agent = A2ARemoteAgent(
+    config=AgentConfig(name="research_remote", agent_type=AgentType.A2A),
+    a2a_config=A2AAgentConfig(
+        url="https://research.example.com",
+        transport="JSONRPC",
+        auth=MCPServerAuthSettings(oauth=True),
+    ),
+)
+await remote_agent.initialize()
+```
+
+Useful diagnostics:
+
+```text
+/a2a list
+/a2a status [agent]
+/a2a card [agent]
+/a2a transport [agent]
+/a2a reset [agent]
+```
+
+`/a2a status` shows the current A2A `context_id`, pending `task_id`, last task
+state, and selected client transport.
+
+When the local A2A AgentCard or request has `use_history: false`, fast-agent
+starts each completed turn with a fresh A2A context. The exception is
+`TASK_STATE_INPUT_REQUIRED`: fast-agent keeps the returned `task_id` and
+`context_id` so the next user message can continue the interrupted task.
+
+## Streaming
+
+Remote A2A `TaskArtifactUpdateEvent` updates are emitted through the normal
+fast-agent stream listener path. The client assembles final text per artifact
+and honors the A2A `append` flag, so replacement updates replace the artifact
+content and append updates extend it.
+
+The A2A client defaults to a longer HTTP request timeout than httpx's default so
+real LLM-backed servers have time to emit the first stream event. Set
+`request_timeout_seconds` on an A2A AgentCard when a remote endpoint needs a
+different timeout.
+
+### Real LLM Server Recording
+
+This recording shows a fast-agent A2A client streaming from a fast-agent A2A
+server backed by a real LLM and the Hugging Face MCP server. It is a provider
+smoke recording, separate from the deterministic fake-server recordings used by
+the test suite.
+
+<div class="a2a-terminal-demo">
+  <div id="a2a-real-llm-hf-player"></div>
+</div>
+
+<script>
+  (function () {
+    function renderRealLlmHfCast() {
+      var target = document.getElementById("a2a-real-llm-hf-player");
+      if (!target || !window.AsciinemaPlayer || target.dataset.loaded === "true") {
+        return;
+      }
+      target.dataset.loaded = "true";
+      window.AsciinemaPlayer.create("../../assets/a2a/a2a-real-llm-hf-streaming.cast", target, {
+        cols: 120,
+        rows: 32,
+        preload: true,
+        speed: 1,
+        idleTimeLimit: 1,
+        fit: "width",
+        theme: "fast-agent-dark"
+      });
+    }
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", renderRealLlmHfCast);
+    } else {
+      renderRealLlmHfCast();
+    }
+    if (window.document$ && window.document$.subscribe) {
+      window.document$.subscribe(renderRealLlmHfCast);
+    }
+  })();
+</script>
+
+Regenerate this provider-backed cast with:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py record-real-llm
+```
+
+## `INPUT_REQUIRED`
+
+When a remote A2A task reaches `TASK_STATE_INPUT_REQUIRED`, fast-agent:
+
+- returns a normal `PromptMessageExtended` assistant turn with
+  `stop_reason=LlmStopReason.PAUSE`;
+- keeps the pending A2A `task_id`;
+- preserves the returned A2A `context_id`;
+- sends the next user message back to the same task.
+
+Use `/a2a reset` to clear the pending task and start a fresh remote context.
+
+### Turn Continuation Recording
+
+This recording shows the task id being retained only while the remote task is in
+`TASK_STATE_INPUT_REQUIRED`; after the follow-up completes, the task id is
+cleared and the context id remains available for future turns.
+
+<div class="a2a-terminal-demo">
+  <div id="a2a-client-input-required-player"></div>
+</div>
+
+<script>
+  (function () {
+    function renderClientInputRequiredCast() {
+      var target = document.getElementById("a2a-client-input-required-player");
+      if (!target || !window.AsciinemaPlayer || target.dataset.loaded === "true") {
+        return;
+      }
+      target.dataset.loaded = "true";
+      window.AsciinemaPlayer.create("../../assets/a2a/a2a-client-input-required.cast", target, {
+        cols: 96,
+        rows: 18,
+        preload: true,
+        speed: 1,
+        idleTimeLimit: 1,
+        fit: "width",
+        theme: "fast-agent-dark"
+      });
+    }
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", renderClientInputRequiredCast);
+    } else {
+      renderClientInputRequiredCast();
+    }
+    if (window.document$ && window.document$.subscribe) {
+      window.document$.subscribe(renderClientInputRequiredCast);
+    }
+  })();
+</script>
+
+## Attachments
+
+The A2A client maps fast-agent prompt content to A2A parts:
+
+| fast-agent content | A2A part |
+|---|---|
+| `TextContent` | `Part(text=...)` |
+| `ResourceLink` | `Part(url=...)` |
+| `ImageContent` | `Part(raw=..., mediaType=image/...)` |
+| `AudioContent` | `Part(raw=..., mediaType=audio/...)` |
+| `EmbeddedResource` with JSON `TextResourceContents` | `Part(data=...)` |
+
+Remote URL, data, raw, and text response parts are rendered into fast-agent
+assistant output. See [Protocol Compliance](protocol-compliance.md) for current
+partial multimodal gaps.
diff --git a/docs/docs/a2a/getting-started.md b/docs/docs/a2a/getting-started.md
new file mode 100644
index 000000000..ce24cfa10
--- /dev/null
+++ b/docs/docs/a2a/getting-started.md
@@ -0,0 +1,419 @@
+---
+title: A2A Getting Started
+description: Connect fast-agent to remote Agent2Agent (A2A) servers and deploy fast-agent agents over A2A HTTP transports.
+---
+
+# A2A Getting Started
+
+fast-agent can connect to remote [Agent2Agent (A2A)](https://a2a-protocol.org/)
+agents as first-class agents, and can serve fast-agent agents over A2A HTTP
+transports. The quickest client path is the `--a2a` command-line shortcut, which
+creates a temporary `type: a2a` AgentCard for the current run.
+
+For focused reference material, see:
+
+- [Use as Client](client.md);
+- [Serve as A2A Server](server.md);
+- [API Usage](api.md);
+- [Protocol Compliance](protocol-compliance.md).
+
+This guide uses the deterministic fake A2A server included in the fast-agent test
+suite. That keeps the examples copy/pasteable and gives us a repeatable docs +
+test pipeline.
+
+## 1. Start the fake A2A server
+
+From the fast-agent repository root, run:
+
+```bash
+--8<-- "docs/docs/a2a/snippets/start-fake-server.sh"
+```
+
+The fake server exposes:
+
+| Endpoint | URL |
+|---|---|
+| AgentCard | `http://127.0.0.1:41242/.well-known/agent-card.json` |
+| JSON-RPC | `http://127.0.0.1:41242/a2a/jsonrpc` |
+| HTTP+JSON | `http://127.0.0.1:41242/a2a/rest` |
+
+Keep this server running in one terminal, then use a second terminal for the
+client commands below.
+
+If you forget the fake server prompts, send:
+
+```text
+help
+```
+
+The fake server responds with its available demo commands. This is separate from
+fast-agent's local `/a2a help`, which lists TUI-side A2A commands.
+
+## 2. Connect from the CLI and stream a response
+
+```bash
+--8<-- "docs/docs/a2a/snippets/cli-stream-command.sh"
+```
+
+Expected output:
+
+```text
+--8<-- "docs/docs/a2a/snippets/cli-stream-output.txt"
+```
+
+The `--a2a` value is normally the remote A2A agent's base URL. fast-agent resolves
+its AgentCard from `/.well-known/agent-card.json`, selects the requested
+transport, sends the message, and prints the final aggregated response.
+
+For a longer manual streaming test, use the same server with:
+
+```bash
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport JSONRPC \
+  --message "please long stream" \
+  --quiet
+```
+
+In the TUI, send:
+
+```text
+please long stream
+```
+
+The fake server emits a multi-step "remote analysis" artifact over several
+streaming updates, which is useful for checking the live renderer rather than
+only the final assistant turn. The stream uses `Step 1 — ...` text instead of a
+Markdown ordered list so the TUI preserves the step labels exactly.
+
+Transport names accepted by fast-agent are:
+
+| Canonical | Useful aliases |
+|---|---|
+| `JSONRPC` | `jsonrpc`, `json-rpc`, `rpc` |
+| `HTTP+JSON` | `http`, `http+json`, `rest` |
+
+## 3. Receive file, URL, and data parts
+
+The fake server can also return non-text A2A parts:
+
+```bash
+--8<-- "docs/docs/a2a/snippets/cli-files-command.sh"
+```
+
+Expected output:
+
+````text
+--8<-- "docs/docs/a2a/snippets/cli-files-output.txt"
+````
+
+Current rendering behavior:
+
+- text parts render as normal assistant text;
+- URL parts render as Markdown links;
+- data parts render as fenced JSON;
+- raw non-image bytes are preserved as blob resources when received by an A2A
+  server, and remote raw response bytes render as a safe
+  filename/media-type/byte-count placeholder in the fast-agent client.
+
+## 4. Continue an `INPUT_REQUIRED` task
+
+A2A agents can pause a task and ask the client for more input. fast-agent maps
+that state to a normal assistant turn, keeps the remote `task_id`, and sends the
+next user message back to the same task.
+
+With the fake server, type this in the TUI:
+
+```text
+need input
+blue
+```
+
+The first turn receives:
+
+```text
+A2A task TASK_STATE_INPUT_REQUIRED: Please provide the missing value.
+```
+
+The second turn is sent with the pending A2A task id and completes the task:
+
+```text
+input received: blue
+```
+
+Use `/a2a status` between those turns to inspect the preserved `Context`, `Task`,
+and `Last state` fields. `/a2a reset` starts a fresh remote context and clears any
+pending task.
+
+## 5. Use an AgentCard instead of `--a2a`
+
+For persistent configuration, create a card like this:
+
+```yaml
+--8<-- "docs/docs/a2a/snippets/agent-card.yaml"
+```
+
+Then run:
+
+```bash
+uv run fast-agent -x --agent-cards ./fake-a2a.yaml --agent fake_remote
+```
+
+Use AgentCards when you want the connection checked in, shared, or combined with
+other configured agents.
+
+## 6. Connect inside the TUI
+
+You can connect to A2A agents after the TUI has started:
+
+```text
+/a2a connect http://127.0.0.1:41242 --transport JSONRPC --name fake_remote
+```
+
+Useful diagnostics:
+
+```text
+--8<-- "docs/docs/a2a/snippets/tui-session.txt"
+```
+
+The `/a2a` command group currently includes:
+
+```text
+/a2a list
+/a2a status [agent]
+/a2a card [agent]
+/a2a transport [agent]
+/a2a reset [agent]
+/a2a connect <url> [--transport JSONRPC|HTTP+JSON] [--name NAME]
+```
+
+## 7. Serve fast-agent over A2A
+
+Use `fast-agent serve a2a` when you want another A2A client to call a fast-agent
+agent. The A2A server exposes both HTTP transports:
+
+| Endpoint | URL |
+|---|---|
+| AgentCard | `http://127.0.0.1:41241/.well-known/agent-card.json` |
+| JSON-RPC | `http://127.0.0.1:41241/a2a/jsonrpc` |
+| HTTP+JSON | `http://127.0.0.1:41241/a2a/rest` |
+
+Example with an AgentCard bundle:
+
+```bash
+uv run fast-agent serve a2a \
+  --host 127.0.0.1 \
+  --port 41241 \
+  --agent-cards ./agents \
+  --model codexresponses.gpt-5.4-mini
+```
+
+The same runtime wiring used by normal fast-agent agents is available behind the
+served A2A agent: configured MCP servers, tools, skills, hooks, and
+AgentCard-loaded agents are initialized through the regular fast-agent path
+before the A2A server starts.
+
+A2A treats the endpoint as one remote agent or agentic system. The generated A2A
+AgentCard uses skills as capability metadata, and incoming A2A messages route to
+the fast-agent default agent. That default can orchestrate or delegate to other
+loaded agents internally.
+
+For fast-agent-to-fast-agent integrations, API clients can opt into a
+fast-agent-specific routing extension by adding message metadata:
+
+```json
+{
+  "metadata": {
+    "agent": "researcher"
+  }
+}
+```
+
+`fast_agent_agent` is accepted as an equivalent metadata key. Generic A2A
+clients should not depend on this extension.
+
+See [Protocol Compliance](protocol-compliance.md) for the supported A2A 1.0
+surface and known gaps.
+
+### Server sessions
+
+A2A `context_id` is optional in the protocol request. The A2A SDK server
+generates one when the client omits it. `fast-agent serve a2a` also honors the
+normal `--instance-scope` option:
+
+- `shared` reuses the primary fast-agent instance for all A2A contexts;
+- `connection` uses the A2A `context_id` as the server-side instance key;
+- `request` creates and disposes a fresh fast-agent instance per message.
+
+The served agent's own `use_history` setting still controls whether prior turns
+are sent to the model inside the selected instance scope.
+
+Clients should preserve and reuse the returned `context_id` for conversational
+continuity. The fast-agent A2A client does this automatically when history is
+enabled, and intentionally starts completed no-history turns with a fresh
+context.
+
+### API behavior
+
+The A2A server maps incoming A2A parts into the same `PromptMessageExtended`
+shape used by normal fast-agent agents:
+
+- text parts become `TextContent`;
+- URL parts become `ResourceLink` where the URL is valid;
+- raw image bytes become `ImageContent`;
+- other raw bytes become `EmbeddedResource` values with `BlobResourceContents`;
+- data parts become formatted JSON text.
+
+Responses are mapped back to A2A artifact parts and completed with
+`TASK_STATE_COMPLETED`. Provider credential failures are reported as
+`TASK_STATE_AUTH_REQUIRED`. Cancellations are reported as `TASK_STATE_CANCELED`.
+When a fast-agent response has `stop_reason=LlmStopReason.PAUSE`, the A2A server
+reports `TASK_STATE_INPUT_REQUIRED` with the response text as the status message.
+The task remains resumable; clients should send the next user message with the
+same A2A `task_id` and `context_id`. The fast-agent A2A client preserves both
+automatically, as shown in step 4.
+
+## Demo recording
+
+The repeatable docs pipeline can generate an asciinema recording for the TUI
+streaming/files/input-required flow. The committed `.cast` file is embedded below
+and can also be downloaded for local replay.
+
+<div class="a2a-terminal-demo">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/asciinema-player.css">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/catppuccin.css">
+  <div class="a2a-terminal-theme-switch" aria-label="Terminal theme">
+    <button type="button" data-a2a-terminal-theme="auto">Auto</button>
+    <button type="button" data-a2a-terminal-theme="light">Light</button>
+    <button type="button" data-a2a-terminal-theme="dark">Dark</button>
+  </div>
+  <div id="a2a-streaming-files-player"></div>
+</div>
+
+<script src="../../assets/vendor/asciinema-player/asciinema-player.min.js"></script>
+<script>
+  (function () {
+    var override = "auto";
+
+    function siteTheme() {
+      var scheme = document.documentElement.getAttribute("data-md-color-scheme");
+      if (scheme === "slate") {
+        return "dark";
+      }
+      if (scheme === "default") {
+        return "light";
+      }
+      return window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)").matches
+        ? "dark"
+        : "light";
+    }
+
+    function selectedMode() {
+      return override === "auto" ? siteTheme() : override;
+    }
+
+    function currentTheme() {
+      return selectedMode() === "dark" ? "fast-agent-dark" : "fast-agent-light";
+    }
+
+    function updateButtons() {
+      document.querySelectorAll("[data-a2a-terminal-theme]").forEach(function (button) {
+        var active = button.getAttribute("data-a2a-terminal-theme") === override;
+        button.toggleAttribute("aria-pressed", active);
+      });
+    }
+
+    function bindButtons() {
+      document.querySelectorAll("[data-a2a-terminal-theme]").forEach(function (button) {
+        if (button.dataset.bound === "true") {
+          return;
+        }
+        button.dataset.bound = "true";
+        button.addEventListener("click", function () {
+          override = button.getAttribute("data-a2a-terminal-theme") || "auto";
+          updateButtons();
+          renderA2ACast(true);
+        });
+      });
+      updateButtons();
+    }
+
+    function renderA2ACast(force) {
+      var target = document.getElementById("a2a-streaming-files-player");
+      if (!target || !window.AsciinemaPlayer) {
+        return;
+      }
+      bindButtons();
+      var theme = currentTheme();
+      if (!force && target.dataset.loaded === "true" && target.dataset.theme === theme) {
+        return;
+      }
+      target.dataset.loaded = "true";
+      target.dataset.theme = theme;
+      target.innerHTML = "";
+      window.AsciinemaPlayer.create(
+        "../../assets/a2a/a2a-streaming-files.cast",
+        target,
+        {
+          cols: 104,
+          rows: 27,
+          preload: true,
+          poster: "npt:0:03",
+          speed: 1,
+          idleTimeLimit: 1.3,
+          fit: "width",
+          theme: theme
+        }
+      );
+    }
+
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", function () { renderA2ACast(false); });
+    } else {
+      renderA2ACast(false);
+    }
+    if (window.document$ && window.document$.subscribe) {
+      window.document$.subscribe(function () { renderA2ACast(false); });
+    }
+    new MutationObserver(function () { renderA2ACast(false); }).observe(document.documentElement, {
+      attributes: true,
+      attributeFilter: ["data-md-color-scheme"]
+    });
+  })();
+</script>
+
+If the player does not load, [download the A2A streaming/files cast](../assets/a2a/a2a-streaming-files.cast)
+and replay it locally with:
+
+```bash
+asciinema play docs/docs/assets/a2a/a2a-streaming-files.cast
+```
+
+## Regenerate these examples
+
+The page snippets and cast are generated from the same fake server used by the
+integration tests:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py generate
+uv run scripts/a2a_docs_pipeline.py check
+```
+
+To refresh the terminal recording as well, install `asciinema` and `tmux`, then
+run:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py record
+```
+
+Provider-backed A2A recordings are optional. When refreshing them, export the
+environment used by the docs run before recording:
+
+```bash
+export HF_TOKEN=...
+export OPENAI_API_KEY=...
+export A2A_REAL_LLM_MODEL=codexresponses.gpt-5.4-mini
+export A2A_HF_MCP_URL=https://hf.co/mcp
+export A2A_REAL_LLM_RECORD_SECONDS=70
+uv run scripts/a2a_docs_pipeline.py record-real-llm
+```
diff --git a/docs/docs/a2a/host-on-hf.md b/docs/docs/a2a/host-on-hf.md
new file mode 100644
index 000000000..92d5b8088
--- /dev/null
+++ b/docs/docs/a2a/host-on-hf.md
@@ -0,0 +1,244 @@
+---
+title: Host A2A on Hugging Face
+description: Deploy a fast-agent A2A server on Hugging Face Spaces with OAuth credential pass-through.
+---
+
+# Host A2A on Hugging Face
+
+This page describes the target deployment shape for hosting fast-agent as an A2A
+server on Hugging Face Spaces.
+
+The important behavior is credential pass-through: the caller authenticates to
+the hosted A2A server with a Hugging Face OAuth/bearer credential, and
+fast-agent makes that credential available to the running agent. That lets the
+agent use Hugging Face Inference Provider models, the Hugging Face MCP server,
+and Hugging Face tools as the caller rather than as a shared server account.
+
+## Current Status
+
+fast-agent A2A serving supports Hugging Face bearer authentication for HTTP
+`JSONRPC` and `HTTP+JSON` routes when `FAST_AGENT_SERVE_OAUTH=huggingface` is
+set. The public AgentCard stays discoverable, and action routes require a bearer
+token.
+
+The implemented first pass supports static bearer credentials and Hugging Face
+Space header normalization. Browser-based OAuth login for A2A clients is a later
+phase.
+
+## Space Layout
+
+A minimal Space should contain:
+
+```text
+.
+├── app.py
+├── fast-agent.yaml
+├── agents/
+│   └── researcher.yaml
+└── requirements.txt
+```
+
+`requirements.txt`:
+
+```text
+fast-agent-mcp
+```
+
+`app.py`:
+
+```python
+import os
+
+from fast_agent.cli.main import app
+
+
+if __name__ == "__main__":
+    os.environ.setdefault("FAST_AGENT_SERVE_OAUTH", "huggingface")
+    os.environ.setdefault("FAST_AGENT_OAUTH_SCOPES", "access")
+    app()
+```
+
+Start the Space with:
+
+```bash
+fast-agent serve a2a \
+  --host 0.0.0.0 \
+  --port 7860 \
+  --agent-cards ./agents \
+  --model hf.moonshotai/Kimi-K2-Thinking
+```
+
+Use the model/provider alias that matches your application. The key point is
+that Hugging Face provider credentials should come from the request token, not
+from a shared `HF_TOKEN`, when OAuth pass-through is enabled.
+
+## Space Environment
+
+Set these environment variables in the Space:
+
+```text
+FAST_AGENT_SERVE_OAUTH=huggingface
+FAST_AGENT_OAUTH_RESOURCE_URL=https://<space-subdomain>.hf.space
+FAST_AGENT_OAUTH_SCOPES=access
+```
+
+Do not set a shared `HF_TOKEN` unless the Space intentionally needs a server
+credential fallback. For user-scoped inference, the inbound bearer credential is
+the credential source.
+
+## Request Flow
+
+OAuth-enabled A2A flow:
+
+1. The client fetches `/.well-known/agent-card.json`.
+2. The AgentCard advertises bearer/OAuth security requirements.
+3. The client sends A2A requests to `/a2a/jsonrpc` or `/a2a/rest` with
+   `Authorization: Bearer <hf-token>`, or uses OAuth when the card advertises an
+   OAuth/OIDC challenge.
+4. The A2A server validates that a bearer credential is present.
+5. fast-agent stores the token in request context while the agent runs.
+6. Hugging Face Inference Provider model calls and Hugging Face MCP/tool calls
+   can use the request token.
+
+Do not confuse this with fast-agent's ambient Hugging Face client policy. The
+normal CLI can add a discovered local `HF_TOKEN` to Hugging Face URLs without an
+explicit `--auth`: it sends `Authorization` to `hf.co` and `huggingface.co`, and
+`X-HF-Authorization` to ordinary `*.hf.space` app URLs. That protects local
+tokens from being sent as app-level `Authorization` to arbitrary Space apps.
+When the Space endpoint itself is the authenticated A2A or MCP server, use
+explicit endpoint auth instead: `--auth`, checked-in `headers: Authorization:
+...`, or OAuth.
+
+## AgentCard Security
+
+An OAuth-enabled card should advertise security metadata so A2A clients know
+that credentials are required.
+
+The current implementation advertises bearer security:
+
+```json
+{
+  "securitySchemes": {
+    "hf_bearer": {
+      "httpAuthSecurityScheme": {
+        "scheme": "bearer",
+        "bearerFormat": "HF_TOKEN",
+        "description": "Hugging Face bearer token"
+      }
+    }
+  },
+  "securityRequirements": [
+    {
+      "schemes": {
+        "hf_bearer": {}
+      }
+    }
+  ]
+}
+```
+
+Skills include the same `securityRequirements` entry. fast-agent A2A clients can
+also use the existing browser OAuth flow when a remote AgentCard advertises
+OAuth2 or OpenID Connect security schemes.
+
+## Client Configuration
+
+For a checked-in fast-agent A2A client card, explicit bearer headers remain the
+most direct option:
+
+```yaml
+type: a2a
+name: hf_space_agent
+url: https://<space-subdomain>.hf.space
+transport: JSONRPC
+headers:
+  Authorization: "Bearer ${HF_TOKEN}"
+```
+
+This is endpoint authentication: the Space-hosted A2A server is the protected
+resource, so the standard `Authorization` header is the right header. The
+ambient `X-HF-Authorization` Space policy is for ordinary Space apps, not for
+authenticating to an A2A action route that advertises bearer/OAuth security.
+When a fast-agent A2A client connects to a `*.hf.space` URL and the public
+AgentCard advertises HTTP bearer security, a discovered local Hugging Face token
+is automatically promoted to endpoint `Authorization` unless explicit headers
+were configured.
+
+For AgentCards that advertise OAuth2 or OpenID Connect instead of a static
+bearer scheme, enable browser OAuth explicitly or allow the card to activate it:
+
+```yaml
+type: a2a
+name: hosted_agent
+url: https://<space-subdomain>.hf.space
+transport: JSONRPC
+auth:
+  oauth: true
+  persist: keyring
+```
+
+The same `--oauth` switch is available from the TUI:
+
+```text
+/a2a connect https://<space-subdomain>.hf.space --oauth --name hosted_agent
+```
+
+## Inference Provider Use
+
+With request token pass-through, hosted A2A agents can use Hugging Face models
+without putting a shared user token in the Space:
+
+```yaml
+name: researcher
+type: agent
+model: hf.moonshotai/Kimi-K2-Thinking
+instruction: |
+  Answer with concise Markdown.
+  Use Hugging Face tools when current Hub context is needed.
+mcp_connect:
+  - name: huggingface
+    target: "https://huggingface.co/mcp?bouquet=hub_repo_details_readme"
+    auth:
+      forward: huggingface
+```
+
+When the A2A request arrives with a user bearer token, Hugging Face provider
+calls use that request token before falling back to Space configuration.
+
+For client-managed Hugging Face MCP URLs, set `auth.forward: huggingface` to
+forward the same inbound request token to `hf.co`, `huggingface.co`, or
+`*.hf.space` upstreams. For Space upstreams, forwarded requests use
+`X-HF-Authorization`; for `hf.co` and `huggingface.co`, they use
+`Authorization`. This mode is intended for hosted Spaces where the agent should
+act as the caller rather than as a shared Space identity. For the Hugging Face
+MCP server itself, use `https://huggingface.co/mcp?...`; the forwarded request
+token is sent there as `Authorization: Bearer ...`. It preserves explicit
+`Authorization`/`X-HF-Authorization` headers when they are configured and
+disables OAuth escalation for that MCP connection.
+
+Do not combine `auth.forward: huggingface` with a shared `HF_TOKEN` expectation
+for that MCP server: forward mode deliberately avoids capturing the Space
+process token during configuration and uses the per-request bearer token at
+connection time.
+
+## Operational Notes
+
+- Keep the AgentCard public so clients can discover endpoint and auth metadata.
+- Require bearer credentials only on A2A action routes.
+- Prefer `--instance-scope connection` for multi-turn authenticated sessions
+  where A2A `contextId` should correlate with a fast-agent instance.
+- Prefer `--instance-scope request` for stateless public endpoints.
+- Use `--host 0.0.0.0` inside the Space; the served AgentCard should advertise
+  the external Space hostname when fetched by clients.
+
+## Verification Targets
+
+The OAuth-enabled A2A implementation should include tests that prove:
+
+- unauthenticated A2A requests are rejected;
+- authenticated A2A requests reach the agent;
+- the verified bearer token is saved at the A2A HTTP boundary and propagated
+  into fast-agent request context;
+- the AgentCard advertises security schemes and requirements;
+- client-side Hugging Face token auto-headers are added for HF URLs;
+- explicit user-supplied auth headers are preserved.
diff --git a/docs/docs/a2a/protocol-compliance.md b/docs/docs/a2a/protocol-compliance.md
new file mode 100644
index 000000000..fa1d0eef4
--- /dev/null
+++ b/docs/docs/a2a/protocol-compliance.md
@@ -0,0 +1,70 @@
+# A2A Protocol Compliance
+
+fast-agent's A2A support targets the
+[A2A Protocol Specification 1.0](https://a2a-protocol.org/v1.0.0/specification/)
+for HTTP transports. The implementation is built on the `a2a-sdk` 1.0 server and
+client stack and intentionally excludes gRPC.
+
+## Supported
+
+| Area | Status | Notes |
+|---|---|---|
+| Agent discovery | Supported | `fast-agent serve a2a` serves an AgentCard at `/.well-known/agent-card.json`. The card declares `JSONRPC` and `HTTP+JSON` interfaces with protocol version `1.0`, and advertises MIME-style input/output modes such as `text/plain`, `application/json`, `application/octet-stream`, and `image/*`. `fast-agent serve --transport a2a` remains supported. |
+| JSON-RPC transport | Supported | Client and server use the SDK JSON-RPC binding. |
+| HTTP+JSON transport | Supported | Client and server use the SDK REST binding. The server exposes the REST binding under `/a2a/rest`. |
+| Streaming task updates | Supported | fast-agent stream listeners are bridged to A2A `TaskArtifactUpdateEvent` events. The client preserves artifact order and honors the A2A `append` flag. |
+| Multi-turn contexts | Supported | Inbound `contextId` is optional. The SDK generates one when omitted. In A2A server `connection` scope, fast-agent uses the resolved `context_id` as the server-side instance key; `shared` and `request` scopes intentionally reuse or recreate instances independently of the A2A context. |
+| `INPUT_REQUIRED` continuation | Supported | Server responses with `PromptMessageExtended.stop_reason == LlmStopReason.PAUSE` become `TASK_STATE_INPUT_REQUIRED`. The fast-agent A2A client preserves the pending `task_id` and returned `context_id`, and surfaces the local response with `LlmStopReason.PAUSE`. |
+| Task retrieval, listing, cancellation, and subscribe | SDK-backed | These operations are provided by the SDK request handler and in-memory task store. Cancellation also cancels the running fast-agent task when still active. |
+| Text parts | Supported | A2A text parts map to `TextContent`; fast-agent text output maps back to A2A text parts. |
+| URL parts | Supported | A2A URL parts map to `ResourceLink`; fast-agent resource links map back to A2A URL parts. |
+| Image raw parts | Supported | Raw image bytes map to `ImageContent`; image output maps back to A2A raw parts. |
+| Binary non-image raw parts | Supported | Inbound raw non-image bytes map to `EmbeddedResource` with `BlobResourceContents`; blob resources map back to A2A raw file parts. |
+| Structured data parts | Supported for prompt content bridges | Inbound A2A data parts are rendered into formatted JSON text for the fast-agent prompt. Outbound fast-agent `TextResourceContents` with `mimeType="application/json"` are emitted as A2A data parts. The A2A 1.0 structured data example also permits JSON returned as text artifacts. |
+| Error states | Supported through SDK plus fast-agent mappings | Provider credential failures map to `TASK_STATE_AUTH_REQUIRED`; uncaught execution failures map to `TASK_STATE_FAILED`; cancellation maps to `TASK_STATE_CANCELED`. Transport and validation errors are handled by the SDK bindings. |
+
+## Known Gaps
+
+| Gap | Impact | Current behavior |
+|---|---|---|
+| gRPC transport | Not supported by design for this work. | The AgentCard does not advertise gRPC, and the CLI/API should use `JSONRPC` or `HTTP+JSON`. |
+| Push notifications | Not implemented. | The AgentCard advertises `pushNotifications=false`; SDK push configuration methods return the protocol's not-supported error. Streaming and polling remain available. |
+| Extended AgentCard | Not implemented. | The server publishes the public AgentCard only and does not configure `extendedAgentCard`. |
+| Authentication/security schemes on served AgentCards | Supported for Hugging Face bearer auth and OAuth/OIDC clients | Remote clients can pass headers when connecting to other A2A agents. When `FAST_AGENT_SERVE_OAUTH=huggingface` is set, serving fast-agent over A2A advertises an `hf_bearer` HTTP bearer security scheme, requires bearer auth on A2A action routes, accepts `Authorization` and also accepts `X-HF-Authorization` when deployment ingress forwards it, and passes the bearer token into fast-agent request context. A2A clients reuse the existing browser OAuth flow when a remote AgentCard advertises OAuth2 or OpenID Connect security schemes, with `auth.oauth` and `/a2a connect --oauth/--no-oauth` controls. In-task provider auth failures are reported as `AUTH_REQUIRED`. |
+| Typed audio content on the server | Partial. | The client can send `AudioContent` as raw A2A parts. The server preserves inbound audio bytes as blob resources rather than mapping them to a dedicated fast-agent `AudioContent` object. |
+| Structured JSON output from model text | Partial. | fast-agent JSON text responses remain text artifacts unless represented as `TextResourceContents` with `mimeType="application/json"`. This avoids guessing whether ordinary text is intended to be protocol data. |
+| Persistent task/session storage | In-memory only. | The server uses the SDK `InMemoryTaskStore` and fast-agent in-memory context instances. Restarting the server loses A2A task state and context-bound fast-agent sessions. |
+| Idempotent `messageId` handling | Not implemented in fast-agent layer. | The SDK validates request shape, but fast-agent does not deduplicate repeated `messageId` values. |
+| AgentCard signing | Not implemented. | The public AgentCard is unsigned. |
+| Extension negotiation | Not implemented. | The server does not advertise or process custom A2A extensions. |
+
+## Verification
+
+The deterministic A2A integration suite exercises:
+
+- JSON-RPC and HTTP+JSON client/server calls;
+- generated `context_id` continuity across turns;
+- fresh A2A context/task state for `use_history=False` turns, except while
+  continuing an `INPUT_REQUIRED` task;
+- A2A server `shared`, `connection`, and `request` instance scopes;
+- AgentCard skill advertisement and fast-agent-specific routing via message
+  metadata;
+- streaming artifact updates delivered to the fast-agent client stream listener;
+- artifact replacement and append semantics on the client;
+- `TASK_STATE_INPUT_REQUIRED` preservation and follow-up completion;
+- raw non-image file preservation into fast-agent blob resources and back to
+  A2A raw parts;
+- raw image and audio input mapping;
+- JSON `TextResourceContents` mapping to A2A data parts;
+- cancellation, task retrieval/listing after cancellation, and protocol error
+  paths via SDK-backed handlers.
+- Hugging Face bearer auth route protection, AgentCard security metadata,
+  saved request-token propagation, and client-side HF auth header application.
+- A2A client OAuth activation for OAuth/OIDC AgentCards, `auth.oauth: false`
+  suppression, checked-in AgentCard auth parsing, and TUI `--oauth/--no-oauth`
+  parsing.
+
+For provider smoke testing, run a fast-agent A2A server with
+`codexresponses.gpt-5.4-mini` and connect to it with the fast-agent A2A client.
+The expected behavior is incremental streaming chunks delivered to client stream
+listeners before the final task completion event.
diff --git a/docs/docs/a2a/server.md b/docs/docs/a2a/server.md
new file mode 100644
index 000000000..7db8687c1
--- /dev/null
+++ b/docs/docs/a2a/server.md
@@ -0,0 +1,254 @@
+---
+title: A2A Server
+description: Deploy fast-agent agents as an Agent2Agent (A2A) HTTP server.
+---
+
+# A2A Server
+
+Use `fast-agent serve a2a` to expose a fast-agent app through A2A HTTP
+transports. `fast-agent serve --transport a2a` remains supported for parity with
+the generic MCP/ACP serve command. The configured fast-agent app is initialized
+first, then the A2A server routes ordinary protocol messages into the
+fast-agent default agent.
+
+## Start a Server
+
+```bash
+uv run fast-agent serve a2a \
+  --host 127.0.0.1 \
+  --port 41241 \
+  --instance-scope shared \
+  --agent-cards ./agents \
+  --model codexresponses.gpt-5.4-mini
+```
+
+The server exposes:
+
+| Endpoint | URL |
+|---|---|
+| AgentCard | `http://127.0.0.1:41241/.well-known/agent-card.json` |
+| JSON-RPC | `http://127.0.0.1:41241/a2a/jsonrpc` |
+| HTTP+JSON | `http://127.0.0.1:41241/a2a/rest` |
+
+The AgentCard advertises `JSONRPC` and `HTTP+JSON` with protocol version `1.0`.
+gRPC is intentionally not advertised.
+
+When the server binds to a wildcard host such as `0.0.0.0` or `::`, the served
+AgentCard builds interface URLs from the incoming AgentCard request host. This
+keeps cards fetched from another machine routable to the server instead of
+advertising the bind wildcard or the server's loopback address.
+
+## Card Recording
+
+This recording shows the expected shape when a wildcard-bound server is fetched
+through a routable hostname. The JSON-RPC and HTTP+JSON interfaces use the
+request hostname in the served card.
+
+<div class="a2a-terminal-demo">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/asciinema-player.css">
+  <link rel="stylesheet" href="../../assets/vendor/asciinema-player/catppuccin.css">
+  <div id="a2a-server-card-player"></div>
+</div>
+
+<script src="../../assets/vendor/asciinema-player/asciinema-player.min.js"></script>
+<script>
+  (function () {
+    function renderServerCardCast() {
+      var target = document.getElementById("a2a-server-card-player");
+      if (!target || !window.AsciinemaPlayer || target.dataset.loaded === "true") {
+        return;
+      }
+      target.dataset.loaded = "true";
+      window.AsciinemaPlayer.create("../../assets/a2a/a2a-server-card.cast", target, {
+        cols: 104,
+        rows: 20,
+        preload: true,
+        speed: 1,
+        idleTimeLimit: 1,
+        fit: "width",
+        theme: "fast-agent-dark"
+      });
+    }
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", renderServerCardCast);
+    } else {
+      renderServerCardCast();
+    }
+    if (window.document$ && window.document$.subscribe) {
+      window.document$.subscribe(renderServerCardCast);
+    }
+  })();
+</script>
+
+For static deployment checks, bind with the concrete hostname or address that
+remote clients should use.
+
+## Runtime Wiring
+
+The served A2A agent uses the normal fast-agent runtime. AgentCards, MCP servers,
+tools, skills, hooks, model settings, and workflow agents are loaded through the
+same path used by the CLI and TUI before the A2A server starts.
+
+That means an A2A request can drive a full fast-agent bundle behind one A2A
+agent boundary: an orchestrator, router, tool-using agent, MCP-backed agent, or
+AgentCard-loaded group.
+
+## Agent Skills in the A2A Card
+
+A2A models the served endpoint as one remote agent or agentic system. A2A
+`AgentSkill` entries are advertised capabilities for that remote agent; they are
+not a standard routing table and do not make multiple directly addressable
+agents at the same endpoint. A2A `AgentSkill` is also separate from fast-agent
+"skills" on disk.
+
+fast-agent advertises loaded user-facing fast-agent agents as A2A skills so
+clients can understand the capabilities available behind the endpoint:
+
+```json
+{
+  "id": "researcher",
+  "name": "researcher",
+  "description": "Research and summarize source material.",
+  "tags": ["fast-agent", "basic"],
+  "examples": ["Hello"],
+  "inputModes": ["text/plain", "application/json", "application/octet-stream", "image/*"],
+  "outputModes": ["text/plain", "application/json", "application/octet-stream", "image/*"]
+}
+```
+
+The generated skill list comes from the user-facing fast-agent agents loaded at
+server startup. The skill `id` and `name` are derived from the fast-agent agent
+name. The description uses the agent's configured `description` when present,
+otherwise fast-agent generates a generic description. Tags include `fast-agent`
+and the fast-agent agent type.
+
+Messages route to the fast-agent default agent by default. That default agent
+should orchestrate, delegate, or call helper agents internally just as it would
+in normal fast-agent use.
+
+For fast-agent-to-fast-agent integrations, the server also accepts a
+fast-agent-specific routing extension in message metadata:
+
+```json
+{
+  "metadata": {
+    "agent": "researcher"
+  }
+}
+```
+
+`fast_agent_agent` is accepted as an equivalent metadata key. This metadata is
+not portable A2A behavior; generic A2A clients should treat skills as capability
+metadata and send normal messages to the endpoint.
+
+Current limitation: examples are still generic, and mode lists describe the
+server-wide MIME-style content support rather than deriving per-agent modality
+declarations from fast-agent AgentCard metadata or installed fast-agent skills.
+
+## Instance Scope, Sessions, and Resumption
+
+A2A `contextId` is optional on inbound messages. If a client omits it, the A2A
+SDK generates one. The server still returns and tracks the resolved A2A
+`context_id`; how that maps to fast-agent runtime instances depends on
+`--instance-scope`:
+
+| Scope | Behavior |
+|---|---|
+| `shared` | Use the primary fast-agent instance for all A2A contexts. This is the default for `fast-agent serve a2a`, matching the generic serve default. |
+| `connection` | Use the A2A `context_id` as the server-side instance/session key. The same `context_id` reuses the same fast-agent instance; a new `context_id` creates a fresh instance. |
+| `request` | Create and dispose a fresh fast-agent instance for every A2A message. |
+
+The served agent's history setting controls how much prior conversation is sent
+to the model inside whichever instance scope is selected. It does not change the
+A2A protocol `context_id`.
+
+For `INPUT_REQUIRED`, clients should continue with the returned `task_id` and
+`context_id`. `shared` and `connection` scopes preserve in-memory fast-agent
+state for follow-up turns. `request` scope intentionally creates a fresh
+fast-agent instance for each message, so it is best for stateless agents.
+
+The current server uses in-memory A2A task storage and in-memory fast-agent
+context instances. Restarting the process loses A2A task state and session
+continuity.
+
+## Streaming
+
+The server registers a fast-agent stream listener for agents that support it.
+Non-reasoning text chunks are sent as A2A `TaskArtifactUpdateEvent` updates with
+a stable artifact id. The first chunk replaces/creates the artifact; later chunks
+use A2A append semantics.
+
+If the final fast-agent response differs from the streamed text, the server sends
+a final replacement artifact for the same artifact id before completing the task.
+
+## `INPUT_REQUIRED`
+
+When a fast-agent response has:
+
+```python
+stop_reason=LlmStopReason.PAUSE
+```
+
+the A2A server reports `TASK_STATE_INPUT_REQUIRED` with the response text as the
+status message. The task remains resumable. Clients should send the follow-up
+message with the same A2A `task_id` and `context_id`.
+
+## Errors
+
+The server maps common fast-agent outcomes into A2A states:
+
+| fast-agent outcome | A2A state |
+|---|---|
+| normal response | `TASK_STATE_COMPLETED` |
+| `LlmStopReason.PAUSE` | `TASK_STATE_INPUT_REQUIRED` |
+| provider credential error | `TASK_STATE_AUTH_REQUIRED` |
+| cancellation | `TASK_STATE_CANCELED` |
+| unexpected exception | `TASK_STATE_FAILED` |
+
+Transport validation errors, task lookup errors, non-cancelable tasks, and
+unsupported push notification operations are handled by the A2A SDK request
+handler.
+
+## Hugging Face Bearer Auth
+
+Set `FAST_AGENT_SERVE_OAUTH=huggingface` before starting `fast-agent serve a2a`
+to require bearer authentication on `/a2a/jsonrpc` and `/a2a/rest` while keeping
+the public AgentCard discoverable.
+
+The A2A server middleware accepts both header names when they reach the app:
+
+```text
+Authorization: Bearer <token>
+X-HF-Authorization: Bearer <token>
+```
+
+For Space-hosted A2A endpoints, clients should use `Authorization` through
+`--auth`, explicit AgentCard headers, or OAuth. `X-HF-Authorization` is the
+ambient fast-agent CLI policy for ordinary Space app calls; it is not a
+substitute for endpoint bearer auth unless the deployment ingress passes that
+header through to the app. The server advertises an `hf_bearer` HTTP bearer
+security scheme in the AgentCard and stores the inbound token in fast-agent
+request context while the agent runs, allowing Hugging Face Inference Provider
+models and Hugging Face MCP/tools to use the caller credential.
+
+See [Host A2A on Hugging Face](host-on-hf.md) for a Space-oriented setup.
+
+## File Parts
+
+Incoming raw image parts become `ImageContent`. Other raw file parts become
+`EmbeddedResource` values with `BlobResourceContents`, preserving the base64 file
+payload, MIME type, and filename-like attachment URI for the fast-agent agent.
+When a fast-agent response includes a blob resource, the server emits it back to
+A2A clients as a raw file part.
+
+## Structured JSON
+
+A2A supports structured JSON exchange through JSON-compatible data content and
+also allows JSON to be returned as text artifacts. fast-agent does not parse
+ordinary model text and guess that it should become protocol data. Instead, it
+maps `TextResourceContents` with `mimeType="application/json"` to A2A data
+parts. This gives API users and structured-output wrappers an explicit path to
+return protocol-level JSON while preserving normal markdown/text responses.
+
+See [Protocol Compliance](protocol-compliance.md) for the full supported surface
+and known gaps.
diff --git a/docs/docs/a2a/snippets/agent-card.yaml b/docs/docs/a2a/snippets/agent-card.yaml
new file mode 100644
index 000000000..73d310eea
--- /dev/null
+++ b/docs/docs/a2a/snippets/agent-card.yaml
@@ -0,0 +1,4 @@
+type: a2a
+name: fake_remote
+url: http://127.0.0.1:41242
+transport: JSONRPC
diff --git a/docs/docs/a2a/snippets/cli-files-command.sh b/docs/docs/a2a/snippets/cli-files-command.sh
new file mode 100644
index 000000000..52426618a
--- /dev/null
+++ b/docs/docs/a2a/snippets/cli-files-command.sh
@@ -0,0 +1,5 @@
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport HTTP+JSON \
+  --message "respond with files" \
+  --quiet
diff --git a/docs/docs/a2a/snippets/cli-files-output.txt b/docs/docs/a2a/snippets/cli-files-output.txt
new file mode 100644
index 000000000..e5aae84dc
--- /dev/null
+++ b/docs/docs/a2a/snippets/cli-files-output.txt
@@ -0,0 +1,9 @@
+file response
+[report.pdf](https://example.com/report.pdf) (application/pdf)
+```json
+{
+  "ok": true,
+  "source": "fake-a2a-server"
+}
+```
+[note.txt: 3 bytes text/plain]
diff --git a/docs/docs/a2a/snippets/cli-stream-command.sh b/docs/docs/a2a/snippets/cli-stream-command.sh
new file mode 100644
index 000000000..4b39697b5
--- /dev/null
+++ b/docs/docs/a2a/snippets/cli-stream-command.sh
@@ -0,0 +1,5 @@
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport JSONRPC \
+  --message "please stream" \
+  --quiet
diff --git a/docs/docs/a2a/snippets/cli-stream-output.txt b/docs/docs/a2a/snippets/cli-stream-output.txt
new file mode 100644
index 000000000..13c2f849a
--- /dev/null
+++ b/docs/docs/a2a/snippets/cli-stream-output.txt
@@ -0,0 +1,2 @@
+stream chunk one
+stream chunk two
diff --git a/docs/docs/a2a/snippets/start-fake-server.sh b/docs/docs/a2a/snippets/start-fake-server.sh
new file mode 100644
index 000000000..9efefc2b4
--- /dev/null
+++ b/docs/docs/a2a/snippets/start-fake-server.sh
@@ -0,0 +1 @@
+uv run python tests/integration/a2a/fake_server.py --port 41242
diff --git a/docs/docs/a2a/snippets/tui-session.txt b/docs/docs/a2a/snippets/tui-session.txt
new file mode 100644
index 000000000..5e598b57f
--- /dev/null
+++ b/docs/docs/a2a/snippets/tui-session.txt
@@ -0,0 +1,8 @@
+/a2a help
+help
+/a2a status
+/a2a transport
+please stream
+respond with files
+need input
+blue
diff --git a/docs/docs/assets/a2a/a2a-client-cli.cast b/docs/docs/assets/a2a/a2a-client-cli.cast
new file mode 100644
index 000000000..c40651598
--- /dev/null
+++ b/docs/docs/assets/a2a/a2a-client-cli.cast
@@ -0,0 +1,5 @@
+{"version": 2, "width": 96, "height": 18, "timestamp": 1779385200, "idle_time_limit": 1.0, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "title": "fast-agent A2A client CLI streaming"}
+[0.1, "o", "\u001b[1;36m$ uv run fast-agent -x --a2a http://127.0.0.1:41242 --a2a-transport JSONRPC --message \"please stream\" --quiet\u001b[0m\r\n"]
+[0.8, "o", "stream chunk one\r\n"]
+[1.4, "o", "stream chunk two\r\n"]
+[1.9, "o", "\u001b[1;32mA2A task completed over JSONRPC\u001b[0m\r\n"]
diff --git a/docs/docs/assets/a2a/a2a-client-input-required.cast b/docs/docs/assets/a2a/a2a-client-input-required.cast
new file mode 100644
index 000000000..453463989
--- /dev/null
+++ b/docs/docs/assets/a2a/a2a-client-input-required.cast
@@ -0,0 +1,8 @@
+{"version": 2, "width": 96, "height": 18, "timestamp": 1779385201, "idle_time_limit": 1.0, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "title": "fast-agent A2A input-required continuation"}
+[0.1, "o", "\u001b[1;35mfast-agent ▸\u001b[0m need input\r\n"]
+[0.8, "o", "A2A task TASK_STATE_INPUT_REQUIRED: Please provide the missing value.\r\n"]
+[1.2, "o", "\u001b[2m/a2a status remote\u001b[0m\r\n"]
+[1.6, "o", "Context: 7b7c8d9e\r\nTask: task-input-001\r\nLast state: TASK_STATE_INPUT_REQUIRED\r\nTransport: JSONRPC\r\n"]
+[2.2, "o", "\u001b[1;35mfast-agent ▸\u001b[0m blue\r\n"]
+[2.8, "o", "input received: blue\r\n"]
+[3.2, "o", "\u001b[2mTask cleared after completion; context preserved for the next turn.\u001b[0m\r\n"]
diff --git a/docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast b/docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast
new file mode 100644
index 000000000..b6beb34cf
--- /dev/null
+++ b/docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast
@@ -0,0 +1,169 @@
+{"version": 2, "width": 120, "height": 32, "timestamp": 1779830028, "idle_time_limit": 1.3, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "title": "fast-agent A2A real LLM Hugging Face MCP streaming demo"}
+[0.073334, "o", "\u001b[?1049h\u001b[22;0;0t\u001b[?1h\u001b=\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[?1006l\u001b[?1005l\u001b[?2004h\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[?1006l\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[1;1H\u001b[1;32r\u001b[c\u001b[>c\u001b[>q\u001b]10;?\u001b\\\u001b]11;?\u001b\\\u001b[32;1H"]
+[0.073817, "o", "\u001b[?25l\u001b[12d─────────────────────────────────────────────────────────────\u001b[32m───────────────────────────────────────────────────────────\u001b(B\u001b[m\u001b[1;1Hfast-agent A2A server ready\u001b[K\r\nmodel: codexresponses.gpt-5.4-mini\u001b[K\r\nMCP: https://hf.co/mcp\u001b[K\r\nlog: /tmp/a2a-real-llm-server.log\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[2B            \"url\": \"http://127.0.0.1:41243/a2a/jsonrpc\",\u001b[K\r\n            \"protocolBinding\": \"JSONRPC\",\u001b[K\r\n            \"protocolVersion\": \"1.0\"\u001b[K\r\n        },\u001b[K\r\n        {\u001b[K\r\n            \"url\": \"http://127.0.0.1:41243/a2a/rest\",\u001b[K\r\n            \"protocolBinding\": \"HTTP+JSON\",\u001b[K\r\n            \"protocolVersion\": \"1.0\"\u001b[K\r\n        }\u001b[K\r\n    ],\u001b[K\r\n    \"provider\": {\u001b[K\r\n        \"url\": \"https://fast-agent.ai\",\u001b[K\r\n        \"organization\": \"fast-agent\"\u001b[K\r\n    },\u001b[K\r\n    \"version\": \"0.7.12\",\u001b[K\r\n    \"capabilities\": {\u001b[K\r\n        \"streaming\": true,\u001b[K\r\n\u001b[K\r\ninteractive A2A JSON-RPC client\u001b[K\r\n\u001b[K\u001b[?12l\u001b[?25h\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[?1006l\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[1;1H\u001b[1;32r\u001b[32;1H\u001b[?25l\u001b[12d─────────────────────────────────────────────────────────────\u001b[32m───────────────────────────────────────────────────────────\u001b(B\u001b[m\u001b[1;1Hfast-agent A2A server ready\u001b[K\r\nmodel: codexresponses.gpt-5.4-mini\u001b[K\r\nMCP: https://hf.co/mcp\u001b[K\r\nlog: /tmp/a2a-real-llm-server.log\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[2B            \"url\": \"http://127.0.0.1:41243/a2a/jsonrpc\",\u001b[K\r\n            \"protocolBinding\": \"JSONRPC\",\u001b[K\r\n            \"protocolVersion\": \"1.0\"\u001b[K\r\n        },\u001b[K\r\n        {\u001b[K\r\n            \"url\": \"http://127.0.0.1:41243/a2a/rest\",\u001b[K\r\n            \"protocolBinding\": \"HTTP+JSON\",\u001b[K\r\n            \"protocolVersion\": \"1.0\"\u001b[K\r\n        }\u001b[K\r\n    ],\u001b[K\r\n    \"provider\": {\u001b[K\r\n        \"url\": \"https://fast-agent.ai\",\u001b[K\r\n        \"organization\": \"fast-agent\"\u001b[K\r\n    },\u001b[K\r\n    \"version\": \"0.7.12\",\u001b[K\r\n    \"capabilities\": {\u001b[K\r\n        \"streaming\": true,\u001b[K\r\n\u001b[K\r\ninteractive A2A JSON-RPC client\u001b[K\r\n\u001b[K\u001b[?12l\u001b[?25h"]
+[0.576436, "o", "\u001b[?25l\u001b[12d─────────────────────────────────────────────────────────────\u001b[32m───────────────────────────────────────────────────────────\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[32;1H"]
+[1.523996, "o", "\u001b[?25l"]
+[1.643163, "o", "\u001b[?12l\u001b[?25h"]
+[1.651248, "o", "\u001b[13;32r\u001b[1;1H\u001b[4S\u001b[28d\u001b[2mUse \u001b[32m'/'\u001b[39m for commands, \u001b[32m'!'\u001b[39m for shell. \u001b[32m'#'\u001b[39m to query, \u001b[32m'@'\u001b[39m to switch agents\r\nCTRL+T multiline, CTRL+Y copy last message, CTRL+E external editor.\u001b(B\u001b[m\u001b[K\r\n\u001b[2mCTRL+Space or Tab for path completion. Use \u001b[35m/\u001b[95mattach\u001b[39m, `^file:`, or `^url:` for attachments. F10 to clear.\u001b(B\u001b[m\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[1.654661, "o", "\u001b[13;32r\u001b[32;1H\u001b[K❯\r\u001b[5S\u001b[4A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[1;32r\u001b[27;3H"]
+[1.658664, "o", "\u001b[13;32r\u001b[32;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mhf-model-research\u001b[47m | 000 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.12                                            \u001b[32;1H\u001b[1;32r\u001b[26;3H\u001b(B\u001b[m"]
+[3.127208, "o", "Use the Hugging Face MCP server if available. Answer in markdown: what models are trending on Hugging Face right now?\r\nInclude concise bullets and mention any uncertainty."]
+[3.130885, "o", "\u001b[26;1H❯ Use the Hugging Face MCP server if available. Answer in markdown: what models are trending on Hugging Face right now?\r\nInclude concise bullets and mention any uncertainty.\u001b[32;1H \u001b[K\u001b[27;53H"]
+[3.131134, "o", "\u001b[13;32r\u001b[27;1H\u001b[J\u001b[A\u001b[K\u001b[1;32r\u001b[26;1H"]
+[3.13133, "o", "\u001b[A"]
+[3.131662, "o", "\u001b[?25l"]
+[3.132647, "o", "\r\n"]
+[3.132951, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[27;1H\u001b(B\u001b[m"]
+[3.133658, "o", "Use the Hugging Face MCP server if available. Answer in markdown: what models are trending on Hugging Face right now? \r\nInclude concise bullets and mention any uncertainty.\r\n"]
+[3.135369, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.135814, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.233021, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.333659, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.363961, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.364466, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.435726, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.536464, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.637946, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.739535, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.842132, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[3.944595, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.04751, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.14976, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.251273, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.352838, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.454627, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.555196, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.657177, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.759326, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.861101, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[4.962693, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.064262, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.077852, "o", "\u001b[?7727h\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[?1006l\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[1;1H\u001b[1;32r\u001b[29;55H\u001b[?25l"]
+[5.078327, "o", "\u001b[12;1H─────────────────────────────────────────────────────────────\u001b[32m───────────────────────────────────────────────────────────\u001b(B\u001b[m\u001b[1;1Hfast-agent A2A server ready\u001b[K\r\nmodel: codexresponses.gpt-5.4-mini\u001b[K\r\nMCP: https://hf.co/mcp\u001b[K\r\nlog: /tmp/a2a-real-llm-server.log\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[2B    \"provider\": {\u001b[K\r\n        \"url\": \"https://fast-agent.ai\",\u001b[K\r\n        \"organization\": \"fast-agent\"\u001b[K\r\n    },\u001b[K\r\n    \"version\": \"0.7.12\",\u001b[K\r\n    \"capabilities\": {\u001b[K\r\n        \"streaming\": true,\u001b[K\r\n\u001b[K\r\ninteractive A2A JSON-RPC client\u001b[K\u001b[2m\r\nUse \u001b[32m'/'\u001b[39m for commands, \u001b[32m'!'\u001b[39m for shell. \u001b[32m'#'\u001b[39m to query, \u001b[32m'@'\u001b[39m to switch agents\u001b(B\u001b[m\u001b[K\u001b[2m\r\nCTRL+T multiline, CTRL+Y copy last message, CTRL+E external editor.\u001b(B\u001b[m\u001b[K\u001b[2m\r\nCTRL+Space or Tab for path completion. Use \u001b[35m/\u001b[95mattach\u001b[39m, `^file:`, or `^url:` for attachments. F10 to clear.\u001b(B\u001b[m\u001b[K\r\n\u001b[K\u001b[34m\r\n▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b(B\u001b[m\u001b[27;1HUse the Hugging Face MCP server if available. Answer in markdown: what models are trending on Hugging Face right now? \u001b[K\r\nInclude concise bullets and mention any uncertainty.\u001b[K\u001b[37m\r\n▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[29;55H"]
+[5.166327, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.268759, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.371287, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.473277, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.574318, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.67729, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.778088, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.879917, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[5.981723, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.083636, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.185365, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.287557, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.389484, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.490851, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.591307, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.693249, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.79348, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.895219, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[6.99585, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.097913, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.199735, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.302329, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.403473, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.504576, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.605069, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.705838, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.806409, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[7.909307, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.01064, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.112427, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.214289, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.315833, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.416458, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.518218, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.6189, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.72154, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.824324, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.926231, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.026645, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.128934, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.230088, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.331928, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.43245, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.533182, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.634504, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.737445, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.83751, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[9.93957, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.040342, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠠\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.141221, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠐\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.242182, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠈\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.343214, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.434606, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.43498, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.435459, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.435901, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.436433, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.437114, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.437557, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.437993, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.438436, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.438821, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.439349, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.439708, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.440196, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.440599, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.441089, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.441468, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.443559, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m response      ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[10.446937, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[10.447065, "o", "\u001b[?25l"]
+[10.449357, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[13;32r\u001b[32;1H\n\u001b[28;3H \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\u001b[30;1H\u001b(B\u001b[m\u001b[33m\u001b[4mTrending models on Hugging Face right now\u001b(B\u001b[m                                                                              \u001b[32;1HBased on a live Hub search\u001b[K●\r\n\u001b[31;28H                                                                                            \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.473681, "o", "\u001b[ABased on a live Hub search sorted by \u001b[1mtrending score\u001b(B\u001b[m at the time of my query, these are currently near●                 \r\n"]
+[10.493251, "o", "\u001b[13;32r\u001b[32;1H\n\u001b[2ABased on a live Hub search sorted by \u001b[1mtrending score\u001b(B\u001b[m at the time of my query, these are currently near the top:         \u001b[32;1H\u001b[1m \u001b(B\u001b[m\u001b[K\u001b[1m• \u001b(B\u001b[m**[bytedance-research/L●\r\n\u001b[31;28H                                                                                            \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.511822, "o", "\u001b[A\u001b[1m • \u001b(B\u001b[m**[bytedance-research/Lance](https://hf.co/bytedance-research/L●                                                    \r\n"]
+[10.529039, "o", "\u001b[A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multim●                                                                                  \r\n"]
+[10.565659, "o", "\u001b[A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b(B\u001b[m**[NemoStation/Marlin●\u001b[13;32r\u001b[32;1H\n\u001b[31;26H                                                                                              \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.581037, "o", "\u001b[2A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b(B\u001b[m**[NemoStation/Marlin-2●                                                                                            \r\n"]
+[10.600694, "o", "\u001b[2A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m●                                                                                              \r\n"]
+[10.621347, "o", "\u001b[2A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/c●                                  \r\n"]
+[10.643135, "o", "\u001b[2A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b(B\u001b[m**[meitu●\u001b[13;32r\u001b[32;1H\n\u001b[31;13H                                                                                                           \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.679775, "o", "\u001b[3A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b(B\u001b[m**[meituan-longcat/LongCat-Video-Avatar-1.5](https://hf.co/meituan-longcat●                                         \r\n"]
+[10.699712, "o", "\u001b[3A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b(B\u001b[m**[meituan-longcat/LongCat-Video-Avatar-1.5](https://hf.co/meituan-longcat/LongCat-Video-Avatar-1.5●                \r\n"]
+[10.718574, "o", "\u001b[3A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar●                                               \r\n"]
+[10.737952, "o", "\u001b[3A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b(B\u001b[m**[●\u001b[13;32r\u001b[32;1H\n\u001b[31;8H                                                                                                                \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.757963, "o", "\u001b[4A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b(B\u001b[m**[sapientinc/HRM-Text-1B](https●                                                                                   \r\n"]
+[10.798746, "o", "\u001b[4A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m —●                                                                                           \r\n"]
+[10.821085, "o", "\u001b[4A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b(B\u001b[m**[openbmb/MiniCP●\u001b[13;32r\u001b[32;1H\n\u001b[31;22H                                                                                                  \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.835477, "o", "\u001b[5A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b(B\u001b[m**[openbmb/MiniCPM5●                                                                                                \r\n"]
+[10.869941, "o", "\u001b[5A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context●                                                                           \r\n"]
+[10.919711, "o", "\u001b[5A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b(B\u001b[m**[Supertone/supertonic-3](https://hf.co/Supertone●\u001b[13;32r\u001b[32;1H\n\u001b[31;55H                                                                 \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.939607, "o", "\u001b[6A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model. -●                                                      \r\n"]
+[10.960763, "o", "\u001b[6A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b(B\u001b[m**[HauhauCS/Qwen3.6-35B-A3●\u001b[13;32r\u001b[32;1H\n\u001b[31;31H                                                                                         \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[10.985277, "o", "\u001b[7A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b(B\u001b[m**[HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive](●                                                       \r\n"]
+[11.056264, "o", "\u001b[7A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b(B\u001b[m**[CohereLabs●\u001b[13;32r\u001b[32;1H\n\u001b[31;18H                                                                                                      \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[11.123478, "o", "\u001b[8A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b(B\u001b[m**[CohereLabs/command-a-plus●                                                                                       \r\n"]
+[11.187463, "o", "\u001b[8A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b(B\u001b[m**[CohereLabs/command-a-plus-05-2026-w4a●                                                                           \r\n"]
+[11.248955, "o", "\u001b[8A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b(B\u001b[m**[CohereLabs/command-a-plus-05-2026-w4a4](https://hf.co/CohereLabs/command-a-plus-05-2026-w●                       \r\n"]
+[11.312543, "o", "\u001b[8A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m●                                                                             \r\n"]
+[11.378351, "o", "\u001b[8A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b(B\u001b[m**[SulphurAI/Sulphur-2-base](●\u001b[13;32r\u001b[32;1H\n\u001b[31;34H                                                                                      \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[11.436153, "o", "\u001b[9A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b(B\u001b[m**[SulphurAI/Sulphur-2-base](https://hf.co/SulphurAI/Sulphur-●                                                      \r\n"]
+[11.500605, "o", "\u001b[9A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b(B\u001b[m**●\u001b[13;32r\u001b[32;1H\n\u001b[31;7H                                                                                                                 \r\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[11.564845, "o", "\u001b[10A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b(B\u001b[m**[deepseek-ai/DeepSeek-V4-Pro](https://hf.co/de●                                                                   \r\n"]
+[11.626279, "o", "\u001b[10A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general●                                                                              \r\n"]
+[11.687321, "o", "\u001b[10A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m —\u001b[13;32r\u001b[1;1H\u001b[3S\u001b[28;33H general text-generation model.                                                        \u001b[30;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[K\r\n\u001b[K\r\n\u001b[1m \u001b(B\u001b[m\u001b[K\u001b[1m• \u001b(B\u001b[mThis is a **●                                                                                                       \u001b[1;32r\u001b[32;1H"]
+[11.749654, "o", "\u001b[13A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[30;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[32;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending”●                                                                             \r"]
+[11.813091, "o", "\u001b[15dBased on a live Hub search sorted by \u001b[1mtrending score\u001b(B\u001b[m at the time of my query, these are currently near the top:         \u001b[17;1H\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \r\n\u001b[K\r\n\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[30;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face●                                                                                                       \r\n\u001b[K\u001b[A"]
+[11.87563, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[28;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[30;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own●                                                                    \r"]
+[11.939569, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[28;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[30;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric. -●        \r"]
+[12.002518, "o", "\u001b[15d\u001b[K\r\n\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \r\n\u001b[K\r\n\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \r\n\u001b[K\r\n\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may●                                                                                                    \r"]
+[12.064367, "o", "\u001b[15A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[27;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[29;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, **●                                                                   \r"]
+[12.125296, "o", "\u001b[15A\u001b[1m • \u001b[94m\u001b[4mbytedance-research/Lance\u001b(B\u001b[m — multimodal any-to-any model; currently the highest trending score in this snapshot.      \r\n\u001b[1m • \u001b[94m\u001b[4mNemoStation/Marlin-2B\u001b(B\u001b[m — video-text-to-text model focused on video understanding/captioning.                         \r\n\u001b[1m • \u001b[94m\u001b[4mmeituan-longcat/LongCat-Video-Avatar-1.5\u001b(B\u001b[m — audio-driven video/avatar generation.                                    \r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[27;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[29;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the●                      \r"]
+[12.190698, "o", "\u001b[15dvideo/avatar generation.                                                                                               \r\n\u001b[K\r\n\u001b[1m • \u001b[94m\u001b[4msapientinc/HRM-Text-1B\u001b(B\u001b[m — text-generation model centered on hierarchical reasoning.                                  \r\n\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \r\n\u001b[K\r\n\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[27;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes●                                                                                         \r\n\u001b[K"]
+[12.253889, "o", "\u001b[15deasoning.                                                                                                              \u001b[17;1H\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \r\n\u001b[K\r\n\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \r\n\u001b[K\r\n\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \r\n\u001b[K\r\nIf●                                                                                                                    \r"]
+[12.313291, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[24;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[26;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \u001b[31;1HIf you want, I can also narrow this to \u001b[1monly text models\u001b(B\u001b[m,●                                                              \r"]
+[12.375387, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[24;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[26;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \u001b[31;1HIf you want, I can also narrow this to \u001b[1monly text models\u001b(B\u001b[m, \u001b[1mopen-weight LLMs\u001b(B\u001b[m, or●                                         \r"]
+[12.436454, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[24;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[26;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \u001b[31;1HIf you want, I can also narrow this to \u001b[1monly text models\u001b(B\u001b[m, \u001b[1mopen-weight LLMs\u001b(B\u001b[m, or **the top 10 by●                         \r"]
+[12.501122, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[24;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[26;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \u001b[31;1HIf you want, I can also narrow this to \u001b[1monly text models\u001b(B\u001b[m, \u001b[1mopen-weight LLMs\u001b(B\u001b[m, or \u001b[1mthe top 10 by downloads/likes\u001b(B\u001b[m instead.●  \r"]
+[12.503646, "o", "\u001b[14A\u001b[1m • \u001b[94m\u001b[4mopenbmb/MiniCPM5-1B\u001b(B\u001b[m — small long-context / tool-calling text model.                                                 \r\n\u001b[1m • \u001b[94m\u001b[4mSupertone/supertonic-3\u001b(B\u001b[m — multilingual text-to-speech model.                                                         \r\n\u001b[1m • \u001b[94m\u001b[4mHauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive\u001b(B\u001b[m — multimodal Qwen-based model.                              \r\n\u001b[1m • \u001b[94m\u001b[4mCohereLabs/command-a-plus-05-2026-w4a4\u001b(B\u001b[m — multimodal image-text-to-text model.                                       \r\n\u001b[1m • \u001b[94m\u001b[4mSulphurAI/Sulphur-2-base\u001b(B\u001b[m — text-to-video model.                                                                     \r\n\u001b[1m • \u001b[94m\u001b[4mdeepseek-ai/DeepSeek-V4-Pro\u001b(B\u001b[m — general text-generation model.                                                        \u001b[24;1H\u001b[33m\u001b[1mUncertainty\u001b(B\u001b[m                                                                                                            \u001b[26;1H\u001b[1m • \u001b(B\u001b[mThis is a \u001b[1mlive snapshot\u001b(B\u001b[m, so “trending” can change quickly.                                                          \r\n\u001b[1m • \u001b(B\u001b[mHugging Face trending is based on the Hub’s own ranking signal, not a single universal popularity metric.           \r\n\u001b[1m • \u001b(B\u001b[mSome models may trend due to \u001b[1mrecent releases\u001b(B\u001b[m, \u001b[1mdownload spikes\u001b(B\u001b[m, or \u001b[1mcommunity attention\u001b(B\u001b[m, so the list may look         \r\n\u001b[1m   \u001b(B\u001b[mdifferent in a few minutes or hours.                                                                                \u001b[31;1HIf you want, I can also narrow this to \u001b[1monly text models\u001b(B\u001b[m, \u001b[1mopen-weight LLMs\u001b(B\u001b[m, or \u001b[1mthe top 10 by downloads/likes\u001b(B\u001b[m instead.   \r\n\u001b[?12l\u001b[?25h\u001b[13;32r\u001b[32;1H\n\u001b[K\u001b[1;32r\u001b[32;1H"]
+[12.509875, "o", "\u001b[13;32r\u001b[32;1H\u001b[K❯\r\u001b[5S\u001b[4A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[1;32r\u001b[27;3H"]
+[12.513589, "o", "\u001b[13;32r\u001b[32;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mhf-model-research\u001b[47m | 001 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.12                                            \u001b[32;1H\u001b[1;32r\u001b[26;3H\u001b(B\u001b[m"]
+[73.109057, "o", "\r❯ /exit\u001b[32;1H \u001b[K\u001b[26;8H"]
+[73.109361, "o", "\u001b[13;32r\u001b[27;1H\u001b[J\u001b[A\u001b[K\u001b[1;32r\u001b[26;1H"]
+[73.110378, "o", "\u001b[2m❯ \u001b[35m/\u001b[95mexit\r\n\u001b(B\u001b[m"]
+[73.111632, "o", "\r\n\u001b[31m\u001b[1mUser requested exit:\r\n\u001b(B\u001b[mUser requested to exit fast-agent session\r\n"]
+[73.427316, "o", "\u001b[?25l\u001b[Hfast-agent A2A server ready\u001b[K\r\nmodel: codexresponses.gpt-5.4-mini\u001b[K\r\nMCP: https://hf.co/mcp\u001b[K\r\nlog: /tmp/a2a-real-llm-server.log\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[?12l\u001b[?25h\u001b[6d"]
+[73.681045, "o", "\u001b[?25l\u001b[?12l\u001b[?25h"]
diff --git a/docs/docs/assets/a2a/a2a-server-card.cast b/docs/docs/assets/a2a/a2a-server-card.cast
new file mode 100644
index 000000000..53b060bed
--- /dev/null
+++ b/docs/docs/assets/a2a/a2a-server-card.cast
@@ -0,0 +1,6 @@
+{"version": 2, "width": 104, "height": 20, "timestamp": 1779385202, "idle_time_limit": 1.0, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "title": "fast-agent A2A server card and transports"}
+[0.1, "o", "\u001b[1;36m$ uv run fast-agent serve --transport a2a --host 0.0.0.0 --port 41241 --agent-cards ./agents\u001b[0m\r\n"]
+[0.8, "o", "fast-agent A2A server listening on http://0.0.0.0:41241\r\n"]
+[1.4, "o", "\u001b[1;36m$ curl -s http://a2a.example.test:41241/.well-known/agent-card.json | jq '.supportedInterfaces'\u001b[0m\r\n"]
+[2.0, "o", "[\r\n  {\r\n    \"protocolBinding\": \"JSONRPC\",\r\n    \"url\": \"http://a2a.example.test:41241/a2a/jsonrpc\"\r\n  },\r\n  {\r\n    \"protocolBinding\": \"HTTP+JSON\",\r\n    \"url\": \"http://a2a.example.test:41241/a2a/rest\"\r\n  }\r\n]\r\n"]
+[3.0, "o", "\u001b[1;32mThe served card uses the hostname from the incoming card request.\u001b[0m\r\n"]
diff --git a/docs/docs/assets/a2a/a2a-streaming-files.cast b/docs/docs/assets/a2a/a2a-streaming-files.cast
new file mode 100644
index 000000000..f20eb7250
--- /dev/null
+++ b/docs/docs/assets/a2a/a2a-streaming-files.cast
@@ -0,0 +1,122 @@
+{"version": 2, "width": 104, "height": 27, "timestamp": 1779315468, "idle_time_limit": 1.3, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}, "title": "fast-agent A2A streaming, files, and input-required demo"}
+[0.017471, "o", "\u001b[?1049h\u001b[22;0;0t\u001b[?1h\u001b=\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[?1006l\u001b[?1005l\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[?1006l\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[?2004l\u001b[1;1H\u001b[1;27r\u001b[>c\u001b[>q\u001b[1;1H\u001b[?25l\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[?12l\u001b[?25h\u001b[H"]
+[0.01813, "o", "\u001b(B\u001b[m\u001b[?12l\u001b[?25h\u001b[?1006l\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[?2004l\u001b[1;1H\u001b[1;27r\u001b[1;1H\u001b[?25l\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[?12l\u001b[?25h\u001b[H"]
+[1.019492, "o", "\u001b[?7727h"]
+[2.063998, "o", "\u001b[?25l"]
+[2.17086, "o", "fast-agent is accessing the OS keyring for stored tokens. Some platforms may pause and show a prompt. \r\n(loading Codex OAuth tokens)\r\n"]
+[2.732611, "o", "\u001b[?12l\u001b[?25h"]
+[2.740202, "o", "\u001b[2mUse \u001b[32m'/'\u001b[39m for commands, \u001b[32m'!'\u001b[39m for shell. \u001b[32m'#'\u001b[39m to query, \u001b[32m'@'\u001b[39m to switch agents\r\nCTRL+T multiline, CTRL+Y copy last message, CTRL+E external editor.\r\nCTRL+Space or Tab for path completion. Use \u001b[35m/\u001b[95mattach\u001b[39m, `^file:`, or `^url:` for attachments. F10 to clear.\r\n\u001b(B\u001b[m"]
+[2.740663, "o", "\u001b[2mfast-agent environment\u001b(B\u001b[m \u001b[34m~/source/fast-agent-pr/.cdx\u001b[39m\u001b[2m \u001b[1m(\u001b[36m2\u001b(B\u001b[m\u001b[2m agents, \u001b[36m\u001b[1m2\u001b(B\u001b[m\u001b[2m hooks, \u001b[36m\u001b[1m8\u001b(B\u001b[m\u001b[2m extensions, \u001b[36m\u001b[1m2\u001b(B\u001b[m\u001b[2m modelrefs\u001b[1m)\u001b(B\u001b[m\u001b[2m via \r\ncli\r\n\u001b(B\u001b[m"]
+[2.740791, "o", "\r\n"]
+[2.742784, "o", "\r\n\u001b[J\u001b[A\u001b[K❯\u001b[C\u001b[?2004h"]
+[2.74691, "o", "\u001b[27;1H\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 000 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[9;3H\u001b(B\u001b[m"]
+[4.022714, "o", "\r❯ /a2a help\u001b[27;1H \u001b[K\u001b[9;12H"]
+[4.023539, "o", "\r\n\u001b[J\u001b[A\u001b[K"]
+[4.023645, "o", "\u001b[?2004l"]
+[4.024947, "o", "\u001b[2m❯ \u001b[35m/\u001b[95ma2a\u001b[39m help\r\n\u001b(B\u001b[m"]
+[4.153861, "o", "\u001b[1mA2A commands\r\n\u001b(B\u001b[m"]
+[4.154026, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m list\r\n"]
+[4.154143, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m status \r\n"]
+[4.154212, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m card \r\n"]
+[4.154502, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m transport \r\n  \u001b[35m/\u001b[95ma2a\u001b[39m reset \r\n"]
+[4.154731, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m connect \u001b[1m<\u001b[95murl\u001b[39m>\u001b(B\u001b[m \u001b[1m[\u001b(B\u001b[m--transport JSONRPC|HTTP+JSON|GRPC\u001b[1m]\u001b(B\u001b[m \u001b[1m[\u001b(B\u001b[m--name NAME\u001b[1m]\u001b(B\u001b[m \u001b[1m[\u001b(B\u001b[m--card-path PATH\u001b[1m]\r\n\u001b(B\u001b[m"]
+[4.154842, "o", "  \u001b[35m/\u001b[95ma2a\u001b[39m help\r\n"]
+[4.160033, "o", "\r\n\u001b[J\u001b[A\u001b[K❯\u001b[C\u001b[?2004h"]
+[4.163184, "o", "\u001b[27;1H\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 000 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[18;3H\u001b(B\u001b[m"]
+[8.02567, "o", "\r❯ help\u001b[27;1H \u001b[K\u001b[18;7H"]
+[8.025923, "o", "\r\n\u001b[J\u001b[A\u001b[K"]
+[8.026097, "o", "\u001b[?2004l"]
+[8.026514, "o", "\u001b[A\u001b[K"]
+[8.026899, "o", "\u001b[?25l"]
+[8.027763, "o", "\r\n"]
+[8.027971, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────\u001b[19;1H\u001b(B\u001b[m"]
+[8.028503, "o", "help\r\n"]
+[8.02983, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.030401, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.038376, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[8.038703, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[8.038974, "o", "\r\n"]
+[8.039116, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\r\n\u001b(B\u001b[m"]
+[8.040811, "o", "Fake A2A server commands:                                                                               \u001b[24;1H\u001b[1m • \u001b(B\u001b[mhello: echo a normal response                                                                        \u001b[25;1H\u001b[1m • \u001b(B\u001b[mplease stream: emit two short streaming artifact updates                                             \u001b[26;1H\u001b[1m • \u001b(B\u001b[mplease long stream: emit a longer multi-step streaming artifact                                      \u001b[27;1H\u001b[1m •\u001b(B\u001b[m\r\n\u001b[26;3H\u001b[1m \u001b(B\u001b[mrespond with files: return text, URL, data, and raw byte parts                                       \u001b[27;1H\u001b[1m \u001b(B\u001b[m\u001b[K\u001b[1m•\u001b(B\u001b[m\r\n\u001b[26;3H\u001b[1m \u001b(B\u001b[mneed input: enter an INPUT_REQUIRED task; reply with a value such as blue                            \u001b[27;1H\u001b[1m \u001b(B\u001b[m\u001b[K\u001b[1m•\u001b(B\u001b[m\r\n\u001b[26;3H\u001b[1m \u001b(B\u001b[mhelp: show this menu                                                                                 \u001b[27;1H\u001b[K"]
+[8.040952, "o", "\n\u001b[K"]
+[8.046611, "o", "\u001b[K❯\r\u001b[7S\u001b[6A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[20;3H\u001b[?2004h"]
+[8.049913, "o", "\u001b[27;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 001 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[19;3H\u001b(B\u001b[m"]
+[12.030257, "o", "\r❯ please stream\u001b[27;1H \u001b[K\u001b[19;16H"]
+[12.030461, "o", "\r\n\u001b[J\u001b[A\u001b[K"]
+[12.030628, "o", "\u001b[?2004l"]
+[12.030917, "o", "\u001b[A"]
+[12.031103, "o", "\u001b[?25l"]
+[12.031887, "o", "\r\n"]
+[12.032129, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────\u001b[20;1H\u001b(B\u001b[m"]
+[12.032353, "o", "please stream\r\n"]
+[12.033303, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.033717, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.03681, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.03725, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.037801, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m stream        ⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.132465, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m stream        ⠂\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.233116, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m stream        ⠄\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.333877, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m stream        ⡀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.434559, "o", "\r\u001b[K\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m\u001b[32m stream        ⢀\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[12.437172, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[12.437409, "o", "\r\n"]
+[12.437547, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\r\n\u001b(B\u001b[m"]
+[12.437845, "o", "stream chunk one\r\nstream chunk two\r\n"]
+[12.438039, "o", "\r\n"]
+[12.443878, "o", "\r\n\u001b[J\u001b[A\u001b[K❯\r\u001b[6S\u001b[4A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[20;3H\u001b[?2004h"]
+[12.447256, "o", "\u001b[27;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 002 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[19;3H\u001b(B\u001b[m"]
+[16.037687, "o", "\r❯ respond with files\u001b[27;1H \u001b[K\u001b[19;21H"]
+[16.037893, "o", "\r\n\u001b[J\u001b[A\u001b[K\u001b[?2004l"]
+[16.038686, "o", "\u001b[A"]
+[16.038885, "o", "\u001b[?25l"]
+[16.039608, "o", "\r\n"]
+[16.040014, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────\u001b[20;1H\u001b(B\u001b[mrespond with files\r\n"]
+[16.040968, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[16.041374, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[16.044746, "o", "\r\u001b[K\u001b[37m▎\u001b[2m•\u001b(B\u001b[m\u001b[37m Updated       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[16.045081, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[16.045546, "o", "\r\n"]
+[16.045832, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\r\n\u001b(B\u001b[m"]
+[16.048052, "o", "\u001b[4S\u001b[4Afile response \u001b[94m\u001b[4mreport.pdf\u001b(B\u001b[m (application/pdf)                                                              \u001b[20;1H\u001b[48;5;234m                                                                                                        \u001b[21;1H\u001b[38;5;252m{\u001b[39m                                                                                                       \u001b[22;1H\u001b[38;5;241m  \u001b[38;5;70m\u001b[1m\"ok\"\u001b(B\u001b[m\u001b[38;5;252m\u001b[48;5;234m:\u001b[38;5;241m \u001b[38;5;70m\u001b[1mtrue\u001b(B\u001b[m\u001b[38;5;252m\u001b[48;5;234m,\u001b[39m                                                                                           \u001b[23;1H\u001b[38;5;241m  \u001b[38;5;70m\u001b[1m\"source\"\u001b(B\u001b[m\u001b[38;5;252m\u001b[48;5;234m:\u001b[38;5;241m \u001b[38;5;214m\"fake-a2a-server\"\u001b[39m                                                                           \u001b[24;1H\u001b[38;5;252m}\u001b[39m                                                                                                       \u001b[25;1H                                                                                                        \u001b[26;1H\u001b[49m[note.txt: 3 bytes text/plain]                                                                          \u001b[27;1H\u001b[K\n\u001b[K"]
+[16.05407, "o", "\u001b[K❯\r\u001b[7S\u001b[6A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[20;3H\u001b[?2004h"]
+[16.05789, "o", "\u001b[27;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 003 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[19;3H\u001b(B\u001b[m"]
+[20.037653, "o", "\r❯ need input\u001b[27;1H \u001b[K\u001b[19;13H"]
+[20.03787, "o", "\r\n\u001b[J\u001b[A\u001b[K"]
+[20.038037, "o", "\u001b[?2004l"]
+[20.038339, "o", "\u001b[A"]
+[20.038523, "o", "\u001b[?25l"]
+[20.039175, "o", "\r\n"]
+[20.039413, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────\u001b[20;1H\u001b(B\u001b[m"]
+[20.039629, "o", "need input\r\n"]
+[20.040586, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[20.040944, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[20.043851, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[20.044103, "o", "\r\n"]
+[20.044257, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\r\n\u001b(B\u001b[m"]
+[20.044518, "o", "A2A task TASK_STATE_INPUT_REQUIRED: Please provide the missing value.\r\n\r\n"]
+[20.049456, "o", "\r\n\u001b[J\u001b[A\u001b[K❯\r\u001b[5S\u001b[2A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[20;3H\u001b[?2004h"]
+[20.052764, "o", "\u001b[27;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 004 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[19;3H\u001b(B\u001b[m"]
+[24.039855, "o", "\r❯ blue\u001b[27;1H \u001b[K\u001b[19;7H"]
+[24.040157, "o", "\r\n\u001b[J\u001b[A\u001b[K\u001b[?2004l"]
+[24.04054, "o", "\u001b[A"]
+[24.040757, "o", "\u001b[?25l"]
+[24.041613, "o", "\r\n"]
+[24.041851, "o", "\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m \u001b[34ma2a_remote\u001b[39m \u001b[2m──────────────────────────────────────────────────────────────────────────────────────────\u001b[20;1H\u001b(B\u001b[m"]
+[24.042042, "o", "blue\r\n"]
+[24.043107, "o", "\u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[24.043504, "o", "\r\u001b[K\u001b[34m▎\u001b[2m▶\u001b(B\u001b[m\u001b[34m Sending       \u001b[32m⠁\u001b[39m \u001b[34ma2a_remote\u001b[39m \u001b[37m\u001b[2m                        \u001b(B\u001b[m"]
+[24.047733, "o", "\r\u001b[K\u001b[?12l\u001b[?25h"]
+[24.047941, "o", "\r\n"]
+[24.04816, "o", "\u001b[32m▎\u001b[2m◀\u001b(B\u001b[m \u001b[32ma2a_remote\u001b[39m \u001b[2mA2A\r\n\u001b(B\u001b[m"]
+[24.048285, "o", "input received: blue\r\n"]
+[24.048469, "o", "\r\n"]
+[24.054183, "o", "\r\n\u001b[J\u001b[A\u001b[K❯\r\u001b[5S\u001b[2A\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\r\n\u001b[K\u001b[20;3H\u001b[?2004h"]
+[24.057601, "o", "\u001b[27;1H\n\u001b[K\u001b[30m\u001b[47m\u001b[7m \u001b[35m\u001b[40m a2a_remote[A2A] \u001b[30m\u001b[47m  \u001b[100m ▲ \u001b[47m \u001b[42mfast-agent fake A2A serv…\u001b[47m | 005 | \u001b[32m\u001b[40m NRML \u001b[30m\u001b[47m | fast-agent 0.7.8                     \u001b[19;3H\u001b(B\u001b[m"]
+[28.043905, "o", "\r❯ /exit\u001b[27;1H \u001b[K\u001b[19;8H"]
+[28.04412, "o", "\r\n\u001b[J\u001b[A\u001b[K"]
+[28.044301, "o", "\u001b[?2004l"]
+[28.044799, "o", "\u001b[2m❯ \u001b[35m/\u001b[95mexit\r\n\u001b(B\u001b[m"]
+[28.045225, "o", "\r\n\u001b[31m\u001b[1mUser requested exit:\r\n\u001b(B\u001b[m"]
+[28.045377, "o", "User requested to exit fast-agent session\r\n"]
+[28.538456, "o", "\u001b[1;27r\u001b(B\u001b[m\u001b[?1l\u001b>\u001b[H\u001b[2J\u001b[?12l\u001b[?25h\u001b[?1000l\u001b[?1002l\u001b[?1003l\u001b[?1006l\u001b[?1005l\u001b[?7727l\u001b[?1004l\u001b[?1049l\u001b[23;0;0t"]
+[28.538726, "o", "[exited]\r\n"]
diff --git a/docs/docs/assets/social/a2a/api.png b/docs/docs/assets/social/a2a/api.png
new file mode 100644
index 000000000..1736a9766
Binary files /dev/null and b/docs/docs/assets/social/a2a/api.png differ
diff --git a/docs/docs/assets/social/a2a/client.png b/docs/docs/assets/social/a2a/client.png
new file mode 100644
index 000000000..b81eb5d81
Binary files /dev/null and b/docs/docs/assets/social/a2a/client.png differ
diff --git a/docs/docs/assets/social/a2a/getting-started.png b/docs/docs/assets/social/a2a/getting-started.png
new file mode 100644
index 000000000..ebf17f8f2
Binary files /dev/null and b/docs/docs/assets/social/a2a/getting-started.png differ
diff --git a/docs/docs/assets/social/a2a/host-on-hf.png b/docs/docs/assets/social/a2a/host-on-hf.png
new file mode 100644
index 000000000..f5a597103
Binary files /dev/null and b/docs/docs/assets/social/a2a/host-on-hf.png differ
diff --git a/docs/docs/assets/social/a2a/protocol-compliance.png b/docs/docs/assets/social/a2a/protocol-compliance.png
new file mode 100644
index 000000000..34c10c6ce
Binary files /dev/null and b/docs/docs/assets/social/a2a/protocol-compliance.png differ
diff --git a/docs/docs/assets/social/a2a/server.png b/docs/docs/assets/social/a2a/server.png
new file mode 100644
index 000000000..fa6c70ff2
Binary files /dev/null and b/docs/docs/assets/social/a2a/server.png differ
diff --git a/docs/docs/guides/tui.md b/docs/docs/guides/tui.md
index e688e37f7..391506f03 100644
--- a/docs/docs/guides/tui.md
+++ b/docs/docs/guides/tui.md
@@ -86,6 +86,8 @@ Cast asset:
 - Replay locally: asciinema play docs/docs/assets/tui/hf-image-generation.cast
 -->
 
+The image-generation recording was captured with terminal image rendering enabled and the halfcell backend, so asciinema can replay ANSI/Unicode cells instead of terminal-specific image protocols. The Markdown image link and source URL remain visible in the recording as a fallback.
+
 ## Paste and Attach Images / Documents
 
 You can attach images and documents using `/attach` or by using the `^<uri|file>` syntax. The indicator in the status bar shows a count of attachments, and is green if they are found, red if there is an error. Press ++f10++ to clear all attachments.
diff --git a/docs/docs/ref/agent_cards.md b/docs/docs/ref/agent_cards.md
index 81dd6a385..3ea3396de 100644
--- a/docs/docs/ref/agent_cards.md
+++ b/docs/docs/ref/agent_cards.md
@@ -129,6 +129,44 @@ When both target-derived values and explicit fields are present, explicit fields
 If an inferred/provided name collides with another server using different settings,
 startup fails with a collision error. Prefer explicit `name` values for stability.
 
+## Remote A2A AgentCards
+
+A remote A2A agent can be loaded as a first-class fast-agent agent with
+`type: a2a`:
+
+```yaml
+type: a2a
+name: hello_remote
+url: http://127.0.0.1:41241
+transport: JSONRPC
+```
+
+`url` is the A2A agent base URL used to resolve the remote card at
+`/.well-known/agent-card.json`. If the card is served elsewhere, set
+`relative_card_path`:
+
+```yaml
+type: a2a
+name: remote_custom_card
+url: https://agent.example.com
+relative_card_path: /custom/agent-card.json
+transport: HTTP+JSON
+```
+
+Supported transport names are `JSONRPC`, `HTTP+JSON`, and `GRPC`. If `transport`
+is omitted, the A2A SDK chooses from the remote AgentCard's advertised
+interfaces. The current client maps text, URL resources, structured data, and
+raw binary parts into fast-agent messages; inbound raw bytes are represented
+safely with filename/media type/byte count.
+
+For one-off CLI connections, use `--a2a` instead of writing a card:
+
+```bash
+fast-agent go --a2a http://127.0.0.1:41241 --a2a-transport JSONRPC --message hello
+```
+
+In the TUI, use `/a2a connect http://127.0.0.1:41241 --transport HTTP+JSON --name hello_rest`.
+
 ## Examples
 
 ```bash
diff --git a/docs/docs/ref/docs_automation.md b/docs/docs/ref/docs_automation.md
index eb95c89f3..e1bdc4711 100644
--- a/docs/docs/ref/docs_automation.md
+++ b/docs/docs/ref/docs_automation.md
@@ -31,6 +31,11 @@ uv run scripts/docs.py assess
 - `assess` runs deterministic screenshot checks for capture dimensions, blank or unstyled pages,
   the designed home-page header, and visible terminal areas.
 
+CI runs `generate`, fails if `_generation_warnings.md` is produced, verifies the committed
+`docs/_generated/` snippets are up to date, then runs the strict docs build. Because the docs job is
+part of the shared checks workflow, it gates pull requests, `main`, and tag-triggered PyPI
+publishing.
+
 ## Terminal Captures
 
 Use `scripts/docs_terminal_capture.py` to run a command and write a terminal-style SVG that can be
@@ -106,6 +111,59 @@ fast-agent -x --model codexplan --url 'https://huggingface.co/mcp?bouquet=dynami
 generate a wide cinematic landscape: a quiet alpine lake at sunrise, dark pine silhouettes, snow-capped mountains, warm orange sky reflected in the water, bold simple shapes, high contrast, no text
 ```
 
+A2A recordings use the same shared asciinema invocation, cleanup, and terminal-teardown trimming
+helpers, while keeping the deterministic fake-server/snippet generation in the A2A pipeline:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py generate
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/a2a_docs_pipeline.py record
+```
+
+For provider-backed A2A recordings, export the recording configuration first so the command is
+copy/paste-ready and the rendered docs can show exactly which environment was used:
+
+```bash
+export HF_TOKEN=...
+export OPENAI_API_KEY=...
+export A2A_REAL_LLM_MODEL=codexresponses.gpt-5.4-mini
+export A2A_HF_MCP_URL=https://hf.co/mcp
+export A2A_REAL_LLM_RECORD_SECONDS=70
+uv run scripts/a2a_docs_pipeline.py record-real-llm
+```
+
+### Image output capture experiment
+
+For review, an interactive Hugging Face MCP image-generation run was recorded with the keyring
+notice suppressed and terminal image rendering enabled:
+
+```bash
+export FAST_AGENT_KEYRING_NOTICE=0
+export LOGGER__TERMINAL_IMAGES__ENABLED=true
+export LOGGER__TERMINAL_IMAGES__BACKEND=halfcell
+uv run fast-agent -x --model codexplan --url https://huggingface.co/mcp
+```
+
+Prompt:
+
+```text
+generate an image of a sunflower
+```
+
+The Hugging Face MCP server invoked `evalstate/flux1_schnell` and returned an `image/webp` block,
+an image URL, and a seed. In the asciinema cast, terminal image output is captured as terminal frames rather than as a separate image asset. Prefer the `halfcell` backend for committed recordings because it replays as ANSI colour and Unicode block characters. A typical captured output includes:
+
+```text
+[IMAGE: image/webp, 81600 bytes]
+Image URL: https://evalstate-flux1-schnell.hf.space/.../image.webp
+Seed used for generation: 2131209976
+[IMAGE 1: image/webp, ... bytes]
+ANSI/Unicode half-block image preview...
+```
+
+The URL and Markdown image link remain visible in the cast, so this is safe as a docs fallback even
+when the player does not reconstruct the inline terminal image exactly.
+
 ## Social Cards
 
 Every Markdown page gets a committed 1200×630 PNG under `docs/assets/social/`. `overrides/main.html`
@@ -174,8 +232,8 @@ aligned with the implementation.
 
 ## Proposed Next Automations
 
-- Add a CI docs job that runs `uv run scripts/docs.py generate`, fails if generated files changed,
-  then runs `uv run scripts/docs.py build` and `uv run scripts/docs.py assess`.
+- Consider adding `uv run scripts/docs.py assess` to CI after the deterministic screenshot flow is
+  stable in hosted runners.
 - Add a snippet verifier that scans docs for `--8<--` includes and confirms every referenced file
   exists under an allowed root.
 - Add example smoke tests for docs-included examples so pages cannot point at broken sample code.
diff --git a/docs/docs/ref/go_command.md b/docs/docs/ref/go_command.md
index ae127beca..e79a69360 100644
--- a/docs/docs/ref/go_command.md
+++ b/docs/docs/ref/go_command.md
@@ -28,12 +28,14 @@ fast-agent go [OPTIONS]
 - `--config-path`, `-c <path or uri>`: Path, HTTP(S) URL, `file://` URI, or `hf://` URI to config file
 - `--servers <server1>,<server2>`: Comma-separated list of server names to enable from config
 - `--url TEXT`: Comma-separated list of HTTP/SSE URLs to connect to directly
-- `--auth TEXT`: Bearer token for authorization with URL-based servers
+- `--auth TEXT`: Bearer token for authorization with remote MCP URL servers and A2A endpoints
 - `--model`, `--models <model_string>`: Override the default model (e.g., haiku, sonnet, gpt-4)
 - `--model`, `--models <model1>,<model2>,...`: Run one agent per model in parallel and print a side-by-side comparison of responses
 - `--pack`, `--card-pack <name>`: Ensure a named card pack is installed in the selected environment before starting
 - `--pack-registry <path or uri>`: Marketplace file path, HTTP(S) URL, `file://` URI, or `hf://` URI used to resolve `--pack` when it is not already installed
 - `--agent-cards`, `--card <path or uri>`: Load AgentCards as runnable agents from a path, HTTP(S) URL, `file://` URI, or `hf://` URI (repeatable)
+- `--a2a <url>`: Connect to a remote A2A agent by base URL or direct AgentCard URL (repeatable); creates temporary `a2a_remote` AgentCards for the run
+- `--a2a-transport <transport>`: Preferred transport for `--a2a`; accepts `JSONRPC`, `HTTP+JSON`, or `GRPC` plus aliases such as `rest` and `json-rpc`
 - `--card-tool <path or uri>`: Load AgentCards from a path, HTTP(S) URL, `file://` URI, or `hf://` URI and attach them as tools to the selected/default agent (repeatable)
 - `--agent <name>`: Target a specific loaded agent by name for `--message`, `--prompt-file`, and initial interactive mode
 - `--message`, `-m TEXT`: Message to send to the agent (skips interactive mode)
@@ -87,6 +89,9 @@ fast-agent go --servers=fetch,filesystem --model=haiku
 # Directly connecting to HTTP/SSE servers via URLs
 fast-agent go --url=http://localhost:8001/mcp,http://api.example.com/sse
 
+# Connect to a remote A2A agent without writing an AgentCard
+fast-agent go --a2a http://127.0.0.1:41241 --a2a-transport JSONRPC --message hello
+
 # Connecting to an authenticated API endpoint
 fast-agent go --url=https://api.example.com/mcp --auth=YOUR_API_TOKEN
 
@@ -184,6 +189,31 @@ fast-agent go --models sonnet,gpt-5-mini.low --agent sonnet --message "Summarize
   for explicit targeting.
 - Explicit targeting can include tool-only agents when needed for testing.
 
+### A2A quick connect
+
+Use `--a2a` when you want a temporary client-only connection to a remote A2A
+agent without creating an AgentCard file. The value should usually be the remote
+agent base URL:
+
+```bash
+fast-agent go --a2a http://127.0.0.1:41241 --a2a-transport HTTP+JSON
+```
+
+Direct card URLs are also accepted and normalized:
+
+```bash
+fast-agent go --a2a http://127.0.0.1:41241/.well-known/agent-card.json
+```
+
+The generated temporary agent names are `a2a_remote`, `a2a_remote_2`, and so on.
+If a single `--a2a` URL is provided and `--agent` is omitted, fast-agent targets
+that temporary A2A agent automatically. For persistent configuration, write an
+AgentCard with `type: a2a` instead.
+
+Inside the TUI, `/a2a connect <url> [--transport ...] [--name ...]` performs the
+same kind of runtime connection. `/a2a status`, `/a2a card`, `/a2a list`, and
+`/a2a reset` provide diagnostics for connected A2A agents.
+
 ### AgentCards vs ToolCards
 
 `tool-cards` are not a separate file format. They are still AgentCards.
@@ -250,8 +280,11 @@ The `--url` parameter allows you to connect directly to HTTP or SSE servers usin
 
 ### Authentication
 
-The `--auth` parameter provides authentication for URL-based servers:
+The `--auth` parameter provides authentication for remote connections created by
+the CLI:
 
-- When provided, it creates an `Authorization: Bearer TOKEN` header for all URL-based servers
-- This is commonly used with API endpoints that require authentication
+- For `--url`, it creates an `Authorization: Bearer TOKEN` header for all URL-based MCP servers
+- For `--a2a`, it creates an `Authorization: Bearer TOKEN` header for all generated ad hoc A2A AgentCards
+- This is a convenience flag for simple runs where the same credential applies to every remote endpoint
+- Use checked-in config or AgentCards when different remote endpoints need different credentials
 - Example: `fast-agent go --url=https://api.example.com/mcp --auth=12345abcde`
diff --git a/docs/generate_reference_docs.py b/docs/generate_reference_docs.py
index f714f7336..cb03923ed 100644
--- a/docs/generate_reference_docs.py
+++ b/docs/generate_reference_docs.py
@@ -6,8 +6,9 @@
 import inspect
 import os
 import sys
+import types
 from pathlib import Path
-from typing import Any
+from typing import Any, get_args, get_origin
 
 DOCS_ROOT = Path(__file__).resolve().parent
 GENERATED_DIR = DOCS_ROOT / "docs" / "_generated"
@@ -64,8 +65,54 @@ def _md_code(lang: str, code: str) -> str:
     return f"```{lang}\n{code.rstrip()}\n```\n"
 
 
+def _format_type(annotation: object) -> str:
+    """Format type annotations for stable, readable generated docs."""
+    if annotation is None or annotation is types.NoneType:
+        return "None"
+
+    if annotation is Any:
+        return "Any"
+
+    origin = get_origin(annotation)
+    args = get_args(annotation)
+
+    if origin in {types.UnionType, getattr(types, "UnionType", object)} or str(origin) == "typing.Union":
+        parts = [_format_type(arg) for arg in args]
+        parts = [part for part in parts if part != "None"] + [
+            part for part in parts if part == "None"
+        ]
+        return " | ".join(parts)
+
+    if str(origin) == "typing.Literal":
+        values = [repr(arg) if isinstance(arg, str) else _format_type(arg) for arg in args]
+        return f"Literal[{', '.join(values)}]"
+
+    if origin is not None:
+        origin_name = _format_type(origin)
+        if args:
+            return f"{origin_name}[{', '.join(_format_type(arg) for arg in args)}]"
+        return origin_name
+
+    if isinstance(annotation, str):
+        return annotation
+
+    name = getattr(annotation, "__qualname__", None) or getattr(annotation, "__name__", None)
+    module = getattr(annotation, "__module__", None)
+    if name:
+        if module and module not in {"builtins", "typing", "types"}:
+            return f"{module}.{name}".replace("pathlib._local.Path", "pathlib.Path")
+        return name.replace("NoneType", "None")
+
+    return str(annotation).replace("typing.", "").replace("NoneType", "None")
+
+
+def _normalize_signature_text(signature: str) -> str:
+    """Normalize Python-version-specific details in inspect signature output."""
+    return signature.replace("pathlib._local.Path", "pathlib.Path")
+
+
 def _format_signature(name: str, func: Any) -> str:
-    sig = str(inspect.signature(func))
+    sig = _normalize_signature_text(str(inspect.signature(func)))
     return f"{name}{sig}"
 
 
@@ -114,8 +161,7 @@ def generate_request_params_reference() -> str:
     lines.append("| --- | --- | --- | --- |\n")
 
     for field_name, field_info in RequestParams.model_fields.items():
-        annotation = field_info.annotation
-        type_str = getattr(annotation, "__name__", None) or str(annotation)
+        type_str = _format_type(field_info.annotation)
         default = field_info.default
         if default is None and field_info.default_factory is not None:
             default_str = "`<factory>`"
diff --git a/docs/social_cards/contact-sheet.html b/docs/social_cards/contact-sheet.html
index e60a78a18..a09b53fde 100644
--- a/docs/social_cards/contact-sheet.html
+++ b/docs/social_cards/contact-sheet.html
@@ -99,9 +99,104 @@
       <h1>fast-agent social cards</h1>
       <p>Generated review sheet for committed Open Graph/Twitter images.</p>
     </div>
-    <div class="summary">50 cards · 1200×630px target · 1000 KB max</div>
+    <div class="summary">56 cards · 1200×630px target · 1000 KB max</div>
   </header>
   
+            <section>
+              <h2>A2A</h2>
+              <div class="grid">
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/api.png"><img src="../docs/assets/social/a2a/api.png" alt="A2A API"></a>
+                  <div class="meta">
+                    <h3>A2A API</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/api.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/api.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 24 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/client.png"><img src="../docs/assets/social/a2a/client.png" alt="A2A Client"></a>
+                  <div class="meta">
+                    <h3>A2A Client</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/client.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/client.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 24 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/getting-started.png"><img src="../docs/assets/social/a2a/getting-started.png" alt="A2A Getting Started"></a>
+                  <div class="meta">
+                    <h3>A2A Getting Started</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/getting-started.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/getting-started.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 27 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/host-on-hf.png"><img src="../docs/assets/social/a2a/host-on-hf.png" alt="Host A2A on Hugging Face"></a>
+                  <div class="meta">
+                    <h3>Host A2A on Hugging Face</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/host-on-hf.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/host-on-hf.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 26 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/protocol-compliance.png"><img src="../docs/assets/social/a2a/protocol-compliance.png" alt="A2A Protocol Compliance"></a>
+                  <div class="meta">
+                    <h3>A2A Protocol Compliance</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/protocol-compliance.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/protocol-compliance.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 26 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                
+                <article class="card ok">
+                  <a class="thumb" href="../docs/assets/social/a2a/server.png"><img src="../docs/assets/social/a2a/server.png" alt="A2A Server"></a>
+                  <div class="meta">
+                    <h3>A2A Server</h3>
+                    <dl>
+                      <div><dt>Source</dt><dd>a2a/server.md</dd></div>
+                      <div><dt>Output</dt><dd>docs/assets/social/a2a/server.png</dd></div>
+                      <div><dt>Badge</dt><dd>DOCS</dd></div>
+                      <div><dt>Theme</dt><dd>doc / protocol-grid</dd></div>
+                      <div><dt>Status</dt><dd><span class="pill">ok</span></dd></div>
+                      <div><dt>Size</dt><dd>1200×630 · 25 KB</dd></div>
+                    </dl>
+                  </div>
+                </article>
+                </div>
+            </section>
+            
             <section>
               <h2>Acp</h2>
               <div class="grid">
diff --git a/docs/zensical.toml b/docs/zensical.toml
index 6927ac0d7..ca504f2af 100644
--- a/docs/zensical.toml
+++ b/docs/zensical.toml
@@ -46,6 +46,14 @@ nav = [
     { "Using as an Agent" = "acp/index.md" },
     { "Customizing Agents" = "acp/customizing_agents.md" },
   ] },
+  { "A2A" = [
+    { "Getting Started" = "a2a/getting-started.md" },
+    { "Use as Client" = "a2a/client.md" },
+    { "Serve as A2A Server" = "a2a/server.md" },
+    { "Host on HF" = "a2a/host-on-hf.md" },
+    { "API Usage" = "a2a/api.md" },
+    { "Protocol Compliance" = "a2a/protocol-compliance.md" },
+  ] },
   { "MCP" = [
     { "Configuring Servers" = "mcp/index.md" },
     { "Deploying as an MCP Server" = "mcp/mcp-server.md" },
diff --git a/examples/a2a/README.md b/examples/a2a/README.md
new file mode 100644
index 000000000..99cfe28f8
--- /dev/null
+++ b/examples/a2a/README.md
@@ -0,0 +1,63 @@
+# A2A examples
+
+## Streaming facts server
+
+`facts_server.py` is the fast-agent equivalent of the ADK facts sample. It serves
+a default `facts_agent` over A2A and streams model output to A2A clients.
+
+Run it:
+
+```bash
+GOOGLE_API_KEY=... uv run python examples/a2a/facts_server.py
+```
+
+Defaults:
+
+- `HOST=0.0.0.0`
+- `PORT=8001`
+- `MODEL`/`FAST_AGENT_MODEL=gemini25`
+
+The server exposes:
+
+- AgentCard: `http://localhost:8001/.well-known/agent-card.json`
+- JSON-RPC: `http://localhost:8001/a2a/jsonrpc`
+- HTTP+JSON: `http://localhost:8001/a2a/rest`
+
+Test it with fast-agent as an A2A client:
+
+```bash
+uv run fast-agent go \
+  --a2a http://localhost:8001 \
+  --a2a-transport JSONRPC \
+  --message "Tell me three surprising facts about octopuses."
+```
+
+Streaming is handled by `fast.start_server(transport="a2a")`: fast-agent stream
+listeners are converted into A2A `TaskArtifactUpdateEvent` updates.
+
+## Card-based facts server
+
+The same agent can be served without a Python wrapper using the AgentCard in
+`agent-cards/facts.md`:
+
+```bash
+GOOGLE_API_KEY=... uv run fast-agent serve a2a \
+  --host 0.0.0.0 \
+  --port 8001 \
+  --name facts-a2a \
+  --agent-cards examples/a2a/agent-cards/facts.md
+```
+
+Override the model from the CLI if desired:
+
+```bash
+uv run fast-agent serve a2a \
+  --port 8001 \
+  --name facts-a2a \
+  --agent-cards examples/a2a/agent-cards/facts.md \
+  --model gemini25
+```
+
+Here `facts-a2a` is the served A2A system name, while `facts` is the fast-agent
+AgentCard/skill name. Keeping them distinct avoids the name clash/confusion with
+the ADK sample's `facts_agent`.
diff --git a/examples/a2a/agent-cards/facts.md b/examples/a2a/agent-cards/facts.md
new file mode 100644
index 000000000..c8579ccaa
--- /dev/null
+++ b/examples/a2a/agent-cards/facts.md
@@ -0,0 +1,8 @@
+---
+type: agent
+name: facts
+description: Agent to give interesting facts.
+default: true
+skills: []
+---
+You are a helpful agent who can provide interesting facts.
diff --git a/examples/a2a/facts_server.py b/examples/a2a/facts_server.py
new file mode 100644
index 000000000..2fa53365b
--- /dev/null
+++ b/examples/a2a/facts_server.py
@@ -0,0 +1,40 @@
+import asyncio
+import os
+
+from fast_agent import FastAgent
+
+HOST = os.getenv("HOST", "0.0.0.0")
+PORT = int(os.getenv("PORT", "8001"))
+MODEL = os.getenv("FAST_AGENT_MODEL", os.getenv("MODEL", "gemini25"))
+
+fast = FastAgent(
+    "fast-agent facts A2A server",
+    parse_cli_args=False,
+    quiet=True,
+)
+
+
+@fast.agent(
+    name="facts_agent",
+    model=MODEL,
+    instruction="You are a helpful agent who can provide interesting facts.",
+    default=True,
+)
+async def facts_agent() -> None:
+    """Default A2A facts agent."""
+    pass
+
+
+async def main() -> None:
+    await fast.start_server(
+        transport="a2a",
+        host=HOST,
+        port=PORT,
+        server_name="facts_agent",
+        server_description="Agent to give interesting facts.",
+        instance_scope="connection",
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/plan/05-20-a2a-client.md b/plan/05-20-a2a-client.md
new file mode 100644
index 000000000..2790f0f06
--- /dev/null
+++ b/plan/05-20-a2a-client.md
@@ -0,0 +1,473 @@
+# A2A client integration status
+
+Historical status: superseded.
+
+This file records the initial 2026-05-20 client-only planning pass. It is kept
+as source material for the later A2A implementation, but it is no longer the
+authoritative plan:
+
+- fast-agent now supports both A2A client and A2A server paths for HTTP
+  transports;
+- gRPC is intentionally out of scope for the current fast-agent A2A support
+  target;
+- current orientation, evidence, and remaining gaps live in:
+  - `plan/05-21-a2a-orientation.md`;
+  - `plan/05-21-a2a-goal-addendum.md`;
+  - `plan/05-21-a2a-completion-audit.md`;
+  - `docs/docs/a2a/protocol-compliance.md`.
+
+The sections below should be read as implementation history, not as current
+acceptance criteria.
+
+Date: 2026-05-20
+Last updated: 2026-05-20
+
+## Goal
+
+Allow fast-agent to treat remote A2A agents as first-class configured agents via the
+existing AgentCard mechanism.
+
+Initial scope remains client-only. Server-side fast-agent-as-A2A is deferred.
+
+## Current status
+
+Implemented and smoke-tested:
+
+- `type: a2a` AgentCards load successfully.
+- `AgentType.A2A` is registered and participates in direct factory creation.
+- A2A cards parse the currently supported fields:
+  - `url`;
+  - `transport` (`JSONRPC`, `HTTP+JSON`, `GRPC` accepted by config validation);
+  - `streaming`;
+  - `polling`;
+  - `accepted_output_modes`;
+  - `headers`;
+  - `relative_card_path`.
+- `A2ARemoteAgent` resolves the remote A2A AgentCard and creates an SDK client.
+- JSON-RPC text request/response works against the SDK sample server.
+- The A2A agent is created without attaching a local LLM.
+- Fast-agent local history records A2A user/assistant turns; `/history` works.
+- TUI now maps A2A turns into the existing display infrastructure:
+  - user messages render via `ConsoleDisplay.show_user_message(...)`;
+  - assistant messages render via `ConsoleDisplay.show_assistant_message(...)`;
+  - the active-agent toolbar shows `name[A2A]` in magenta;
+  - the toolbar model segment shows the remote card name instead of `$system.default`.
+- CLI auto tool-card attachment now skips A2A agents, because A2A agents are not
+  valid agents-as-tools parents.
+- Unit coverage exists for A2A card parsing, transport validation, and toolbar
+  A2A identity styling.
+
+Validated commands:
+
+```bash
+uv run pytest tests/unit/fast_agent/core/test_agent_card_loader.py -q
+uv run pytest tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py tests/unit/fast_agent/ui/test_input_toolbar.py -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+## Reference A2A SDK sample server
+
+The local A2A SDK source is at:
+
+```text
+../a2a-python/
+```
+
+Run the sample server:
+
+```bash
+cd ../a2a-python
+uv run python samples/hello_world_agent.py
+```
+
+The sample exposes:
+
+- agent card: `http://127.0.0.1:41241/.well-known/agent-card.json`
+- JSON-RPC: `http://127.0.0.1:41241/a2a/jsonrpc`
+- HTTP+JSON: `http://127.0.0.1:41241/a2a/rest`
+- gRPC v1.0: `127.0.0.1:50051`
+- gRPC v0.3 compatibility: `127.0.0.1:50052`
+
+Minimal fast-agent card:
+
+```yaml
+type: a2a
+name: hello_remote
+url: http://127.0.0.1:41241
+transport: JSONRPC
+```
+
+Smoke test without the TUI:
+
+```bash
+uv run fast-agent -x --agent-cards /tmp/a2a-card.yaml --agent hello_remote --message hello --quiet
+```
+
+Expected output:
+
+```text
+Hello World! Nice to meet you!
+```
+
+## Current runtime mapping
+
+For each `agent.send(...)` / `agent.generate(...)` call:
+
+1. fast-agent input is normalized by `LlmDecorator.generate(...)`.
+2. `A2ARemoteAgent.generate_impl(...)` displays trailing user messages through
+   the same console display path used by local LLM agents.
+3. The latest user text is mapped to an A2A `Message`:
+
+   ```python
+   Message(
+       role=Role.ROLE_USER,
+       message_id=str(uuid.uuid4()),
+       context_id=current_context_id,
+       task_id=current_task_id,
+       parts=[Part(text=user_text)],
+   )
+   ```
+
+4. The SDK client sends `SendMessageRequest(message=message)`.
+5. The returned async event iterator is consumed.
+6. The agent records `context_id`, `current_task_id`, and `last_task_state` where
+   available.
+7. Text is aggregated from direct A2A messages and artifact updates.
+8. A normal `PromptMessageExtended(role="assistant", ...)` is returned and
+   displayed through `ConsoleDisplay.show_assistant_message(...)`.
+
+Terminal state behavior currently implemented:
+
+- completed: returns aggregated text, or a no-output message;
+- failed/canceled/cancelled/rejected/input-required/auth-required: returns a
+  clear A2A task state message;
+- terminal non-input-required tasks clear `current_task_id`;
+- `input_required` preserves task state for future follow-up work.
+
+## TUI verification with tmux
+
+`tmux` has been useful for deterministic TUI reproduction and regression checks.
+It lets us start fast-agent, send keystrokes, and capture the visible terminal
+state without manually driving the UI.
+
+Example:
+
+```bash
+# Ensure sample A2A server is running first.
+cat >/tmp/a2a-card.yaml <<'YAML'
+type: a2a
+name: hello_remote
+url: http://127.0.0.1:41241
+transport: JSONRPC
+YAML
+
+cd /home/ssmith/source/fast-agent-pr
+
+tmux kill-session -t a2atest 2>/dev/null || true
+tmux new-session -d -s a2atest \
+  'cd /home/ssmith/source/fast-agent-pr && FAST_AGENT_MODEL=passthrough uv run fast-agent -x --agent-cards /tmp/a2a-card.yaml --agent hello_remote'
+
+sleep 4
+tmux send-keys -t a2atest 'whhhhhhaaaattt' Enter
+sleep 4
+tmux send-keys -t a2atest 'hello' Enter
+sleep 4
+tmux capture-pane -t a2atest -p -S -3000 | tail -80
+```
+
+Expected visible shape:
+
+```text
+▎▶ hello_remote ────────────────────────────────────────────────────────────────
+whhhhhhaaaattt
+
+▎◀ hello_remote A2A
+Hello World! You said: 'whhhhhhaaaattt'. Thanks for your message!
+
+▎▶ hello_remote ────────────────────────────────────────────────────────────────
+hello
+
+▎◀ hello_remote A2A
+Hello World! Nice to meet you!
+
+❯
+
+  hello_remote[A2A]    ▲  Sample Agent | 002 |  NRML  | fast-agent 0.7.8
+```
+
+Useful tmux commands for future automated checks/docs captures:
+
+```bash
+# Capture current pane text.
+tmux capture-pane -t a2atest -p -S -3000 > /tmp/a2a-tui.txt
+
+# Append all pane output to a log while the session runs.
+tmux pipe-pane -t a2atest -o 'cat >> /tmp/a2a-tui.log'
+
+# Send slash commands or normal input.
+tmux send-keys -t a2atest '/history' Enter
+tmux send-keys -t a2atest 'hello' Enter
+
+# Stop the session.
+tmux kill-session -t a2atest
+```
+
+## Asciinema capture plan
+
+We should try asciinema for documentation-quality terminal recordings while the
+feature evolves. tmux is good for testable text snapshots; asciinema is better for
+replayable demos that can be embedded or converted for the docs site.
+
+Initial local experiment:
+
+```bash
+# Install if needed. Options depend on the environment.
+uv tool install asciinema
+# or: pipx install asciinema
+# or: sudo apt install asciinema
+
+# Start the A2A sample server in another terminal/tmux pane first.
+cd /home/ssmith/source/fast-agent-pr
+asciinema rec /tmp/fast-agent-a2a.cast \
+  -c 'FAST_AGENT_MODEL=passthrough uv run fast-agent -x --agent-cards /tmp/a2a-card.yaml --agent hello_remote'
+```
+
+During recording, type a short scripted flow:
+
+```text
+hello
+whhhhhhaaaattt
+/history
+/exit
+```
+
+Replay locally:
+
+```bash
+asciinema play /tmp/fast-agent-a2a.cast
+```
+
+Potential docs pipeline options to evaluate:
+
+- keep `.cast` files as source artifacts;
+- embed asciinema player in docs pages;
+- convert selected recordings to GIF/SVG/video if static assets are preferred;
+- pair each asciinema capture with a tmux `capture-pane` text fixture for
+  regression-oriented assertions.
+
+## Session and conversation state
+
+A2A does have remote conversational continuity, but it is represented by
+`context_id`, not by replaying fast-agent history.
+
+Current behavior:
+
+- fast-agent keeps normal local `message_history` for display, `/history`, and
+  saved transcript behavior;
+- the remote A2A agent receives only the latest user text for each request;
+- remote continuity is carried through `context_id`;
+- `task_id` tracks current/outstanding A2A work and is cleared on terminal states
+  except input-required style flows.
+
+Recommendation:
+
+- treat local fast-agent history as the transcript/UI history;
+- treat A2A `context_id` as remote conversation/thread state;
+- do not resend the full fast-agent transcript by default;
+- persist A2A state alongside sessions in a future step:
+  - `context_id`;
+  - `current_task_id`;
+  - `last_task_state`;
+  - selected transport;
+  - remote card identity/version.
+
+Open question for resume:
+
+- On fast-agent session resume, should we always reuse the saved A2A `context_id`,
+  or should there be a freshness/remote-card-version check that starts a new
+  context when the old one may no longer be meaningful?
+
+## Remaining work
+
+### Short-term
+
+1. Add SDK-backed tests for factory/runtime connectivity.
+   - Prefer the A2A SDK server primitives over monkeypatching.
+   - Cover JSON-RPC send and text aggregation.
+2. Add a tmux-driven smoke test script or documented manual check.
+   - Keep it optional initially if CI terminal behavior is unreliable.
+3. Try asciinema capture and decide where `.cast` files should live.
+4. Add `/a2a` diagnostics commands.
+
+### `/a2a` command surface
+
+MVP commands still worth adding:
+
+```text
+/a2a list
+/a2a card [agent]
+/a2a status [agent]
+/a2a reset [agent]
+/a2a transport [agent]
+```
+
+Later task lifecycle commands:
+
+```text
+/a2a tasks [agent]
+/a2a get [agent] <task-id>
+/a2a cancel [agent] <task-id>
+/a2a subscribe [agent] <task-id>
+/a2a resume [agent] <task-id>
+```
+
+### Transport coverage
+
+Current practical validation is JSON-RPC. Configuration validation accepts
+`HTTP+JSON` and `GRPC`, but these need explicit integration coverage.
+
+Next transport tests:
+
+- HTTP+JSON against the SDK sample server;
+- gRPC only when optional dependencies are available and the environment is
+  suitable.
+
+### Content mapping beyond text
+
+Current MVP is text-only:
+
+- fast-agent user text -> A2A text part;
+- A2A text messages/artifacts -> fast-agent assistant text.
+
+Later mapping:
+
+- `data` parts -> JSON/fenced JSON or structured side channel;
+- `url` parts -> markdown links/resource references;
+- `raw` parts -> media/document attachments where fast-agent can display or
+  persist them;
+- preserve `media_type` where possible.
+
+### Streaming behavior
+
+The current UI path prioritizes stable user/assistant turn rendering. It consumes
+A2A events synchronously and displays the final aggregated assistant message.
+
+Future streaming refinement:
+
+- route artifact/direct-message text updates into the existing streaming handle;
+- avoid duplicate blank headers;
+- preserve post-stream re-render behavior exactly like local LLM agents;
+- dedupe servers that send full artifact snapshots rather than deltas.
+
+### Server-side fast-agent-as-A2A
+
+Deferred until the client mapping settles.
+
+The SDK server layer should make this straightforward later:
+
+- implement an `AgentExecutor` that wraps an `AgentProtocol` or `AgentApp`;
+- map A2A user messages to fast-agent `send()`/`generate()`;
+- stream fast-agent `StreamChunk`s as A2A artifact updates;
+- expose fast-agent `agent_card()` as the A2A AgentCard.
+
+## 2026-05-20 asciinema/progress spike update
+
+Asciinema capture is now validated as part of the local development/testing
+workflow for TUI-facing A2A work.
+
+Artifacts produced during the spike:
+
+```text
+/tmp/fast-agent-a2a-clean.cast
+/tmp/fast-agent-a2a-natural.cast
+/tmp/fast-agent-a2a-progress.cast
+```
+
+The most useful current demo is:
+
+```text
+/tmp/fast-agent-a2a-progress.cast
+```
+
+Replay it with:
+
+```bash
+asciinema play /tmp/fast-agent-a2a-progress.cast
+```
+
+Fast inspection:
+
+```bash
+asciinema play /tmp/fast-agent-a2a-progress.cast --speed 100
+```
+
+Recording script:
+
+```text
+/tmp/a2a-asciinema-progress.sh
+```
+
+The progress demo uses the same tmux-driven approach:
+
+- create a fixed-size tmux session;
+- disable the tmux status bar;
+- start fast-agent with the A2A card;
+- type input character-by-character for a more natural feel;
+- leave enough delay after Enter for the SDK sample server's wait state to show;
+- record the whole interaction with asciinema.
+
+Recording command:
+
+```bash
+asciinema rec \
+  --overwrite \
+  --cols 104 \
+  --rows 34 \
+  --idle-time-limit 1.3 \
+  -t 'fast-agent A2A progress display demo' \
+  -c /tmp/a2a-asciinema-progress.sh \
+  /tmp/fast-agent-a2a-progress.cast
+```
+
+The `.cast` file is newline-delimited JSON:
+
+- first line: metadata (`version`, `width`, `height`, `timestamp`, `env`, `title`,
+  optional `idle_time_limit`);
+- remaining lines: events shaped like `[time_offset_seconds, "o", "terminal output"]`.
+
+This makes simple edits scriptable:
+
+- retitle recordings by rewriting the first JSON line;
+- trim beginning/end events;
+- redact paths or usernames;
+- compress pauses by rewriting timestamps.
+
+For docs embedding, use `asciinema-player` and serve `.cast` files as static
+assets. Keep the tmux scripts as reproducible sources and the `.cast` files as
+recorded documentation artifacts.
+
+A2A progress display was also wired into the normal progress board. While waiting
+for the remote A2A response the TUI now shows the standard sending row, e.g.:
+
+```text
+▎▶ hello_remote ────────────────────────────────────────────────────────────────
+hello
+▎▶ Sending       ⠄ hello_remote
+```
+
+Then the progress display is paused before rendering the final assistant message,
+so the progress row does not overwrite the completed A2A response:
+
+```text
+▎◀ hello_remote A2A
+Hello World! Nice to meet you!
+```
+
+Recommendation for ongoing A2A UI work:
+
+- run a tmux text capture as a quick regression check;
+- record or update an asciinema cast when the visible behavior changes;
+- keep casts short and focused (one feature per cast);
+- use `--idle-time-limit` so docs recordings remain compact;
+- prefer deterministic tmux scripts over manual recordings for repeatability.
diff --git a/plan/05-20-a2a-connect-streaming-files.md b/plan/05-20-a2a-connect-streaming-files.md
new file mode 100644
index 000000000..9c8e2cb05
--- /dev/null
+++ b/plan/05-20-a2a-connect-streaming-files.md
@@ -0,0 +1,722 @@
+# A2A connect, transport, streaming, and file handling plan
+
+Date: 2026-05-20
+Owner: fast-agent A2A client work
+Recording folder: `/home/ssmith/plan/records/`
+
+## Goal
+
+Make remote A2A agents easy to connect to, prove each supported transport works,
+and support the happy path for streaming and file-capable A2A messages from both
+CLI and TUI flows.
+
+The feature should be demonstrable against both the A2A SDK sample server and a
+small fast-agent-owned fake A2A server fixture that deterministically exercises
+streaming, artifacts, and file/data parts.
+
+## Working rules
+
+- Keep diffs small and commit at natural checkpoints.
+- Before each commit, run at minimum:
+
+  ```bash
+  uv run scripts/lint.py
+  uv run scripts/typecheck.py
+  ```
+
+- For focused changes, also run the relevant pytest target before committing.
+- Do not rewrite unrelated files or existing user changes.
+- Store terminal captures and asciinema recordings under:
+
+  ```text
+  /home/ssmith/plan/records/
+  ```
+
+- Prefer SDK/server fixtures over monkeypatching A2A clients.
+
+## Outcomes
+
+1. A user can test a remote A2A endpoint interactively with `/a2a connect`.
+2. JSON-RPC and HTTP+JSON are covered by SDK-backed integration tests.
+3. gRPC is covered when optional dependencies/environment are available, and is
+   skipped clearly otherwise.
+4. A2A streaming updates render without duplicate/blank headers and settle into a
+   normal assistant turn in the TUI.
+5. A2A file/data happy paths are implemented and tested:
+   - outbound fast-agent attachment or URL -> A2A `Part(raw=...)`/`Part(url=...)`;
+   - inbound A2A `Part(url=...)`/`Part(data=...)`/text artifact -> readable
+     fast-agent assistant content;
+   - raw inbound bytes are at least detected and represented safely.
+6. The same scenarios are testable through non-interactive CLI and through the TUI.
+
+## URL and transport semantics
+
+### Accepted connect URLs
+
+`/a2a connect` should accept:
+
+```text
+/a2a connect http://127.0.0.1:41241
+/a2a connect https://agent.example.com
+/a2a connect https://agent.example.com/base --card-path /.well-known/agent-card.json
+/a2a connect http://127.0.0.1:41241/.well-known/agent-card.json
+```
+
+Preferred input is the A2A agent base URL. Direct agent-card URLs may be accepted
+as a convenience by normalizing to base URL plus `relative_card_path`.
+
+Endpoint URLs such as `/a2a/jsonrpc` are not the preferred user input. If they are
+provided, emit a clear diagnostic explaining that fast-agent expects the base URL
+or card URL.
+
+### Accepted transports
+
+Canonical transport names passed to the SDK:
+
+```text
+JSONRPC
+HTTP+JSON
+GRPC
+```
+
+Friendly command aliases should normalize as follows:
+
+```text
+jsonrpc, json-rpc, rpc       -> JSONRPC
+http, http+json, rest        -> HTTP+JSON
+grpc                         -> GRPC
+```
+
+If no transport is provided, let the SDK choose from the remote AgentCard and show
+what was selected.
+
+## Step 1 — Baseline transport integration tests
+
+### Implementation
+
+- Add an integration test fixture that starts an A2A SDK-compatible test server.
+- Cover creation through normal fast-agent card/factory/runtime paths.
+- Test JSON-RPC text request/response.
+- Test HTTP+JSON text request/response.
+- Add optional gRPC coverage guarded by dependency/port availability.
+
+### Test commands
+
+```bash
+uv run pytest tests/integration/a2a -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### CLI check
+
+```bash
+uv run fast-agent -x --agent-cards /tmp/a2a-card.yaml --agent hello_remote --message hello --quiet
+```
+
+Expected: text response from the remote server.
+
+### TUI check
+
+Use tmux to start fast-agent, send `hello`, capture pane text, and verify the
+assistant response appears under an `A2A` assistant header.
+
+### Commit
+
+Commit after tests/lint/typecheck pass.
+
+## Step 2 — Fake A2A server fixture for deterministic behavior
+
+### Implementation
+
+Build a small local test server under tests/support or tests/integration/a2a that
+uses SDK server primitives and exposes:
+
+- text echo response;
+- delayed streaming status updates;
+- artifact text updates;
+- outbound file/data response modes;
+- JSON-RPC and HTTP+JSON routes;
+- gRPC only if available without making CI brittle.
+
+Avoid coupling tests to the external SDK sample process where possible. Keep the
+external sample useful for manual smoke tests and recordings.
+
+### Test commands
+
+```bash
+uv run pytest tests/integration/a2a -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### Commit
+
+Commit after the fixture and baseline tests pass.
+
+## Step 3 — `/a2a status`, `/a2a card`, `/a2a reset`
+
+### Implementation
+
+Add the diagnostic commands before connect so connect has reusable reporting
+helpers.
+
+Expected surfaces:
+
+```text
+/a2a status [agent]
+/a2a card [agent]
+/a2a reset [agent]
+```
+
+`status` should show URL, remote card name, selected/requested transport,
+streaming/polling flags, context id, current task id, last task state, and output
+modes where available.
+
+`card` should show the resolved remote AgentCard summary and supported
+interfaces.
+
+`reset` should clear `context_id`, `current_task_id`, and `last_task_state`.
+
+### Test commands
+
+```bash
+uv run pytest tests/unit/fast_agent/ui tests/integration/a2a -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### CLI/TUI checks
+
+- TUI: run `/a2a status`, `/a2a card`, `/a2a reset`, then send `hello`.
+- CLI if slash-command execution is available non-interactively: run equivalent
+  command dispatch tests or documented command invocation.
+
+### Commit
+
+Commit after command behavior is tested.
+
+## Step 4 — `/a2a connect`
+
+### Implementation
+
+Add:
+
+```text
+/a2a connect <base-url-or-card-url> [--transport JSONRPC|HTTP+JSON|GRPC] [--name NAME] [--card-path PATH]
+```
+
+Behavior:
+
+1. Normalize URL/card path.
+2. Resolve remote AgentCard.
+3. Display remote card summary and supported interfaces.
+4. Validate requested transport if supplied.
+5. Create a runtime A2A agent or update/switch to a temporary connected agent.
+6. Show selected transport and next action.
+
+Persistent save/write-back is deferred unless trivial. If deferred, print the
+YAML snippet the user can save.
+
+### Test commands
+
+```bash
+uv run pytest tests/unit/fast_agent/ui tests/integration/a2a -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### CLI check
+
+```bash
+uv run fast-agent -x --agent-cards /tmp/minimal-local-card.yaml --agent passthrough_or_default
+```
+
+Then in TUI:
+
+```text
+/a2a connect http://127.0.0.1:41241 --transport JSONRPC --name hello_remote
+hello
+```
+
+Expected: active/connected A2A agent responds.
+
+### Recording
+
+Save an asciinema capture:
+
+```text
+/home/ssmith/plan/records/a2a-connect-jsonrpc.cast
+```
+
+### Commit
+
+Commit after `/a2a connect` works and recordings are captured.
+
+## Step 5 — Streaming display
+
+### Implementation
+
+Route A2A streaming events into the existing streaming/progress display rather
+than only aggregating final text.
+
+Requirements:
+
+- task status updates should update progress without corrupting the transcript;
+- artifact/direct-message text should appear as streaming assistant content;
+- final render should match local LLM assistant turn behavior;
+- full-snapshot artifact servers should not duplicate text excessively;
+- progress board should pause before the final assistant message.
+
+### Test commands
+
+```bash
+uv run pytest tests/integration/a2a -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### CLI check
+
+```bash
+uv run fast-agent -x --agent-cards /tmp/a2a-stream-card.yaml --agent stream_remote --message "stream please" --quiet
+```
+
+Expected: final aggregated text is correct.
+
+### TUI check
+
+Use tmux to verify visible streaming/progress shape. Capture text to:
+
+```text
+/home/ssmith/plan/records/a2a-streaming-tui.txt
+```
+
+Record asciinema:
+
+```text
+/home/ssmith/plan/records/a2a-streaming.cast
+```
+
+### Commit
+
+Commit after tests, lint, typecheck, and TUI capture.
+
+## Step 6 — File/data happy path
+
+### Implementation
+
+Outbound:
+
+- map user text to `Part(text=...)` as today;
+- map local file attachments to `Part(raw=..., media_type=..., filename=...)`;
+- map URL/resource attachments to `Part(url=..., media_type=..., filename=...)`;
+- preserve plain text fallback when unsupported.
+
+Inbound:
+
+- render `Part(text=...)` as assistant text;
+- render `Part(url=...)` as markdown links with media type/filename when present;
+- render `Part(data=...)` as fenced JSON;
+- represent `Part(raw=...)` safely with filename/media type/byte count, and save
+  bytes only if there is an established artifact storage path.
+
+### Test commands
+
+```bash
+uv run pytest tests/integration/a2a tests/e2e/multimodal -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+### CLI checks
+
+Use a local sample file and the fake server:
+
+```bash
+uv run fast-agent -x --agent-cards /tmp/a2a-file-card.yaml --agent file_remote --message "summarize attached file" --quiet
+```
+
+If the current CLI has an attachment flag, include it; otherwise record the gap
+and test through TUI/resource input.
+
+### TUI checks
+
+- Attach or reference a local file/resource.
+- Send to A2A fake server.
+- Verify server receives file metadata/content.
+- Verify inbound URL/data response renders readably.
+
+Save captures:
+
+```text
+/home/ssmith/plan/records/a2a-file-cli.txt
+/home/ssmith/plan/records/a2a-file-tui.txt
+/home/ssmith/plan/records/a2a-file.cast
+```
+
+### Commit
+
+Commit after file/data happy path is tested and recorded.
+
+## Step 7 — Documentation and final verification
+
+### Implementation
+
+- Update user docs/examples for `type: a2a` cards.
+- Document `/a2a connect` URL and transport semantics.
+- Document known limitations for binary inbound data and persistence.
+- Add short demo recording links if the docs pipeline supports asciinema assets.
+
+### Final validation
+
+```bash
+uv run pytest tests/integration/a2a -q
+uv run pytest tests/unit/fast_agent/core/test_agent_card_loader.py -q
+uv run pytest tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py tests/unit/fast_agent/ui/test_input_toolbar.py -q
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+Manual smoke matrix:
+
+| Scenario | CLI | TUI | Recording |
+|---|---:|---:|---:|
+| JSON-RPC text | yes | yes | optional |
+| HTTP+JSON text | yes | yes | optional |
+| gRPC text | optional | optional | optional |
+| `/a2a status/card/reset` | command tests | yes | optional |
+| `/a2a connect` | n/a or command test | yes | yes |
+| Streaming artifact/direct text | yes final text | yes live display | yes |
+| File/data happy path | yes if attachment path exists | yes | yes |
+
+### Final commit
+
+Commit docs and final verification notes.
+
+---
+
+## 2026-05-20 implementation update
+
+The core short-term plan is now mostly implemented and committed in focused
+checkpoints.
+
+### Commit log for this feature slice
+
+```text
+a33ad0d1 test a2a jsonrpc and http transports
+e8867e12 support a2a streaming and file parts
+13fb9804 add a2a diagnostic commands
+94e61939 add interactive a2a connect
+59c603dc add a2a cli shortcut and fake server
+fffa4a90 document and expose a2a transport diagnostics
+98273942 handle a2a connection failures gracefully
+2767f493 add a2a getting started docs pipeline
+dc0e62ad embed asciinema player in a2a docs
+0b91bf5c record colored a2a docs casts
+6ccc47fb add a2a cast theme switch
+a4fb64ba align a2a cast theme with docs styles
+```
+
+### Runtime and transport status
+
+Implemented:
+
+- SDK-backed integration fixture under `tests/integration/a2a/`.
+- JSON-RPC runtime coverage.
+- HTTP+JSON runtime coverage.
+- A deterministic fake server:
+
+  ```bash
+  uv run python tests/integration/a2a/fake_server.py --port 41242
+  ```
+
+- CLI one-shot shortcut:
+
+  ```bash
+  uv run fast-agent -x \
+    --a2a http://127.0.0.1:41242 \
+    --a2a-transport JSONRPC \
+    --message "please stream" \
+    --quiet
+  ```
+
+- Direct card URL normalization for both CLI and TUI connect flows.
+- Graceful connection failure handling for A2A initialization errors. Missing or
+  unreachable servers now produce an `AgentConfigError` style message instead of
+  a rich traceback.
+
+Still open:
+
+- gRPC transport coverage remains optional/deferred.
+- `/a2a transport` reports requested transport and selected SDK client class, but
+  deeper SDK transport introspection may still be worth improving if the SDK
+  exposes a stable public surface.
+
+### Current fake server behavior
+
+The fake server is the source of truth for repeatable docs/tests/demos. It
+exposes:
+
+```text
+AgentCard: http://127.0.0.1:41242/.well-known/agent-card.json
+JSON-RPC:  http://127.0.0.1:41242/a2a/jsonrpc
+REST:      http://127.0.0.1:41242/a2a/rest
+```
+
+Useful prompts:
+
+```text
+hello
+please stream
+respond with files
+```
+
+It deterministically exercises:
+
+- text echo;
+- delayed streaming artifact chunks;
+- inbound URL/data/raw rendering;
+- outbound text/url/raw part mapping.
+
+### Streaming and file/data mapping status
+
+Implemented outbound mapping:
+
+| fast-agent/MCP content | A2A part |
+|---|---|
+| `TextContent` | `Part(text=...)` |
+| `ImageContent` / `AudioContent` | `Part(raw=..., media_type=...)` |
+| `ResourceLink` | `Part(url=..., media_type=..., filename=...)` |
+| `EmbeddedResource` blob | `Part(raw=..., media_type=..., filename=...)` |
+| `EmbeddedResource` text | `Part(text=..., media_type=..., filename=...)` |
+
+Implemented inbound rendering:
+
+| A2A part | fast-agent rendering |
+|---|---|
+| `text` | assistant text |
+| `url` | Markdown link with media type when present |
+| `data` | fenced JSON |
+| `raw` | safe `[filename: N bytes media/type]` placeholder |
+
+Streaming events are consumed incrementally and emitted to stream listeners while
+still producing a final normal assistant turn. The CLI currently prints final
+aggregated text for one-shot mode; the TUI path displays the completed assistant
+turn in the usual A2A-styled transcript.
+
+### `/a2a` command surface now implemented
+
+```text
+/a2a
+/a2a list
+/a2a status [agent]
+/a2a card [agent]
+/a2a transport [agent]
+/a2a reset [agent]
+/a2a connect <url> [--transport JSONRPC|HTTP+JSON|GRPC] [--name NAME] [--card-path PATH]
+```
+
+Notes:
+
+- `/a2a` defaults to status for the current agent.
+- `/a2a connect` creates a runtime A2A agent and switches to it.
+- `/a2a reset` clears local A2A `context_id`, `current_task_id`, and
+  `last_task_state`.
+- Persistent write-back/save is still deferred.
+
+### CLI A2A shortcut now implemented
+
+New flags:
+
+```text
+--a2a <url>                 repeatable; base URL or direct AgentCard URL
+--a2a-transport <transport> JSONRPC, HTTP+JSON, GRPC, or aliases
+```
+
+Transport aliases:
+
+```text
+jsonrpc, json-rpc, rpc -> JSONRPC
+http, http+json, rest  -> HTTP+JSON
+grpc                   -> GRPC
+```
+
+Generated temporary agents are named:
+
+```text
+a2a_remote
+a2a_remote_2
+...
+```
+
+If exactly one `--a2a` is supplied and `--agent` is omitted, fast-agent targets
+that temporary A2A agent automatically.
+
+### Documentation and recording pipeline
+
+A new top-level docs section exists:
+
+```text
+A2A
+└── Getting Started
+```
+
+Primary page:
+
+```text
+docs/docs/a2a/getting-started.md
+```
+
+Docs nav update:
+
+```text
+docs/zensical.toml
+```
+
+Generated snippets live under:
+
+```text
+docs/docs/a2a/snippets/
+```
+
+Current snippets:
+
+```text
+agent-card.yaml
+cli-files-command.sh
+cli-files-output.txt
+cli-stream-command.sh
+cli-stream-output.txt
+start-fake-server.sh
+tui-session.txt
+```
+
+The repeatable pipeline is:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py generate
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/a2a_docs_pipeline.py record
+```
+
+Convenience wrapper:
+
+```bash
+uv run scripts/docs.py a2a
+```
+
+The `record` command now captures a live tmux session with asciinema rather than
+capturing a plain-text tmux pane. This preserves ANSI colors in the `.cast`.
+
+Current committed recording:
+
+```text
+docs/docs/assets/a2a/a2a-streaming-files.cast
+```
+
+Current recording metadata:
+
+```text
+width: 104
+height: 27
+idle_time_limit: 1.3
+contains ANSI escape sequences: yes
+```
+
+The old local working recordings under `/home/ssmith/plan/records/` are still
+useful as scratch/reference artifacts, but the docs source of truth is now the
+committed asset under `docs/docs/assets/a2a/`.
+
+### Embedded asciinema player
+
+The A2A Getting Started page embeds the cast using vendored asciinema-player
+assets:
+
+```text
+docs/docs/assets/vendor/asciinema-player/asciinema-player.css
+docs/docs/assets/vendor/asciinema-player/asciinema-player.min.js
+docs/docs/assets/vendor/asciinema-player/catppuccin.css
+```
+
+Despite the legacy filename `catppuccin.css`, the CSS now defines fast-agent
+native terminal themes aligned to `docs/docs/stylesheets/fast-agent.css` tokens:
+
+```text
+fast-agent-light
+fast-agent-dark
+```
+
+The page includes a player-local switch:
+
+```text
+Auto | Light | Dark
+```
+
+Behavior:
+
+- Auto follows the Zensical docs light/dark mode.
+- Light forces `fast-agent-light`.
+- Dark forces `fast-agent-dark`.
+- The player recreates itself when the local player theme or docs site theme
+  changes.
+
+### Tests added/updated
+
+A2A runtime and command tests now include:
+
+```text
+tests/integration/a2a/conftest.py
+tests/integration/a2a/test_remote_agent_runtime.py
+tests/integration/a2a/fake_server.py
+tests/unit/fast_agent/a2a_connect_test.py
+tests/unit/fast_agent/cli/test_a2a_go_options.py
+tests/unit/fast_agent/core/test_a2a_error_formatting.py
+tests/unit/fast_agent/ui/test_parse_a2a_commands.py
+tests/unit/test_a2a_docs_pipeline.py
+```
+
+The docs pipeline tests check that:
+
+- generated snippets are current;
+- the Getting Started page includes all generated snippets;
+- the cast asset is present;
+- the cast metadata uses the compact 27-row size;
+- the cast contains ANSI escape sequences;
+- vendored asciinema assets are present;
+- fast-agent light/dark terminal themes are present;
+- the page includes Auto/Light/Dark controls.
+
+### Validation commands run after latest changes
+
+```bash
+uv run pytest tests/unit/test_a2a_docs_pipeline.py -q
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+uv run scripts/docs.py build
+```
+
+Earlier broader validation also passed:
+
+```bash
+uv run pytest tests/integration/a2a \
+  tests/unit/fast_agent/a2a_connect_test.py \
+  tests/unit/fast_agent/cli/test_a2a_go_options.py \
+  tests/unit/fast_agent/core/test_a2a_error_formatting.py \
+  tests/unit/fast_agent/ui/test_parse_a2a_commands.py \
+  tests/unit/test_a2a_docs_pipeline.py -q
+```
+
+### Remaining recommended follow-ups
+
+1. Add optional/skipped gRPC integration coverage.
+2. Rename `catppuccin.css` to a fast-agent-specific filename if we want the asset
+   name to match its current purpose. This is cosmetic but would reduce future
+   confusion.
+3. Add docs for persistent A2A session state once `context_id` resume semantics
+   are decided.
+4. Consider a `/a2a save [agent] <path>` command to write a connected runtime A2A
+   agent back to an AgentCard.
+5. Consider richer inbound raw-byte handling if/when fast-agent has a standard
+   artifact storage/display path.
diff --git a/plan/05-21-a2a-completion-audit.md b/plan/05-21-a2a-completion-audit.md
new file mode 100644
index 000000000..0925ecd3f
--- /dev/null
+++ b/plan/05-21-a2a-completion-audit.md
@@ -0,0 +1,78 @@
+# A2A Completion Audit
+
+Current audit status: complete as of the final verification pass. The
+implementation satisfies the requested HTTP A2A client/server scope; the known
+gaps below are intentionally documented protocol/operational limitations rather
+than hidden unfinished work for this goal.
+
+## Requirement Evidence
+
+| Requirement | Current evidence | Status |
+|---|---|---|
+| HTTP A2A client support, no gRPC | `src/fast_agent/a2a/remote_agent.py` restricts default bindings to `JSONRPC` and `HTTP+JSON`; `src/fast_agent/a2a/connect.py` rejects gRPC aliases; `tests/integration/a2a/test_remote_agent_runtime.py` covers both HTTP bindings. | Implemented and tested. |
+| HTTP/JSON-RPC A2A server support | `src/fast_agent/a2a/server.py` exposes SDK JSON-RPC and REST routes; `tests/integration/a2a/test_fast_agent_a2a_server.py` covers JSON-RPC and HTTP+JSON clients against fast-agent-as-server. | Implemented and tested. |
+| Deployable like ACP/MCP | `fast-agent serve a2a` and `fast-agent serve --transport a2a` are covered by `tests/unit/fast_agent/cli/test_a2a_serve_options.py`; server docs show `fast-agent serve a2a`. | Implemented and tested at CLI request-construction level. |
+| PromptMessageExtended API behaves like normal agents | Client and server bridges convert A2A parts to/from `PromptMessageExtended`; integration tests cover text, stream listeners, raw/blob/image/audio/data content, and history behavior. | Implemented and tested. |
+| `INPUT_REQUIRED` turn management | Client preserves task/context only for `TASK_STATE_INPUT_REQUIRED`; server maps `LlmStopReason.PAUSE` to `TASK_STATE_INPUT_REQUIRED`; integration tests cover follow-up completion in both fake-server and fast-agent-server paths. | Implemented and tested. |
+| Session/context correlation | A2A `contextId` is optional inbound; SDK resolves it. Server `connection` scope maps resolved context id to an instance; `shared` and `request` scopes are explicit alternatives. Tests cover context reuse, no-history fresh contexts, and all instance scopes. | Implemented and tested in-memory. |
+| AgentCard and A2A AgentSkill docs | `docs/docs/a2a/server.md` explains AgentCard interfaces and one A2A `AgentSkill` per loaded fast-agent agent; integration tests assert skills, modes, descriptions, tags, and metadata routing. | Implemented, documented, and tested. |
+| API documentation | `docs/docs/a2a/api.md` covers direct `A2ARemoteAgent`, embedded `AgentA2AServer`, raw JSON-RPC, raw HTTP+JSON, content mapping, and structured JSON data parts. | Documented. |
+| Client/server docs pages and recordings | `docs/docs/a2a/client.md` and `server.md` embed deterministic recordings; client docs embed the real-LLM Hugging Face MCP streaming recording; `scripts/a2a_docs_pipeline.py check` verifies required assets/pages. | Documented and pipeline-checked. |
+| Deterministic API/CLI/TUI tests | API/server integration tests cover runtime protocol behavior; CLI tests cover `--a2a`, `--a2a-transport`, and `serve a2a`; TUI unit tests cover `/a2a` parsing and command dispatch. | Covered, but full prompt-toolkit E2E remains a possible future hardening target. |
+| Real LLM streaming demo with HF MCP | `docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast` is checked in; `scripts/a2a_docs_pipeline.py record-real-llm` regenerates it with `codexresponses.gpt-5.4-mini` and `https://hf.co/mcp`; client docs embed it. | Implemented as provider smoke artifact. |
+| Structured JSON protocol answer | A2A structured JSON is represented as `Part.data`; fast-agent maps explicit JSON resources to data parts and leaves ordinary model text as text. Documented in API/server/compliance pages and addendum. | Implemented and documented. |
+| Multimodal support | Tests cover raw image input and audio-as-blob preservation; docs list partial typed audio support as a known gap. | Partially implemented and documented. |
+| Hooks/tools/skills bundle deployment | Server docs state A2A serving uses the normal fast-agent runtime, so AgentCards, MCP servers, tools, skills, hooks, model settings, and workflows load before serving. | Documented; mostly proven indirectly through shared serve/bootstrap path. |
+| Review fixes: clone config | `A2ARemoteAgent._clone_constructor_kwargs()` preserves `a2a_config`; integration test covers detached clone contacting remote server. | Fixed and tested. |
+| Review fixes: artifact append semantics | Client assembles per-artifact output and honors append/replacement; integration test covers replacement plus repeated appended chunks. | Fixed and tested. |
+| Review fixes: default transport probing | Client defaults supported protocol bindings to `JSONRPC` and `HTTP+JSON` when no transport is requested. | Fixed and unit/integration covered. |
+| Review fixes: routable AgentCard URLs | Served wildcard-host AgentCards are rewritten from the incoming request base URL; integration test covers wildcard bind. | Fixed and tested. |
+| Review fixes: terminal task ids | Terminal full-task and status events clear task id except for `INPUT_REQUIRED`; unit tests cover full-task terminal behavior. | Fixed and tested. |
+| Review fixes: raw file preservation | Inbound raw non-image file bytes become `BlobResourceContents`; outbound blobs become A2A raw parts. | Fixed and tested. |
+| Review fixes: no-history A2A context reset | `use_history=False` gets a fresh context between completed turns but preserves context/task while continuing `INPUT_REQUIRED`. | Fixed and tested. |
+| Review fixes: A2A instance scope | A2A serve path passes `instance_scope`; server implements `shared`, `connection`, and `request`; tests cover all scopes. | Fixed and tested. |
+
+## Known Gaps
+
+These are documented as current protocol-compliance gaps rather than hidden
+unfinished work:
+
+- gRPC transport is intentionally unsupported.
+- A2A push notifications are not implemented; streaming and polling are the
+  supported client update paths.
+- Extended AgentCard, card signing, extension negotiation, transport-level
+  security scheme advertisement/enforcement, idempotent message replay handling,
+  and persistent task/session storage are not implemented.
+- Audio is preserved as raw/blob content on the server rather than mapped to a
+  dedicated fast-agent audio content object.
+- Ordinary model text that contains JSON is not guessed into protocol data.
+
+## Final Verification
+
+Final verification was rerun successfully with:
+
+```bash
+uv run pytest tests/integration/a2a \
+  tests/unit/fast_agent/test_a2a_remote_agent_events.py \
+  tests/unit/fast_agent/test_a2a_remote_agent_config.py \
+  tests/unit/fast_agent/a2a_connect_test.py \
+  tests/unit/fast_agent/cli/test_a2a_go_options.py \
+  tests/unit/fast_agent/cli/test_a2a_serve_options.py \
+  tests/unit/fast_agent/ui/test_parse_a2a_commands.py \
+  tests/unit/fast_agent/ui/test_a2a_command_dispatch.py \
+  tests/unit/fast_agent/core/test_a2a_error_formatting.py \
+  tests/unit/test_a2a_docs_pipeline.py \
+  -q
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
+
+Also verify the checked-in real-LLM cast does not contain provider secrets:
+
+```bash
+rg -q "hf_|sk-|OPENAI|ANTHROPIC|Authorization|Bearer|HF_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY" \
+  docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast
+```
+
+The secret scan should exit with status 1 because it finds no matches.
diff --git a/plan/05-21-a2a-goal-addendum.md b/plan/05-21-a2a-goal-addendum.md
new file mode 100644
index 000000000..ab3f2a765
--- /dev/null
+++ b/plan/05-21-a2a-goal-addendum.md
@@ -0,0 +1,51 @@
+# A2A Goal Addendum
+
+This addendum extends the active A2A completion goal with the latest review
+requirements that are not represented in the immutable goal tracker text.
+
+## Documentation recordings
+
+- Include at least one asciinema recording that shows a real fast-agent A2A
+  client streaming from a real fast-agent A2A server backed by an LLM.
+- The preferred provider smoke path is:
+  - server model: `codexresponses.gpt-5.4-mini`;
+  - server tools: Hugging Face MCP server;
+  - client prompt: ask for a Markdown-formatted answer about trending Hugging
+    Face models;
+  - expected user-visible behavior: the client receives streaming updates before
+    the final task completion.
+- Keep this recording separate from deterministic fake-server recordings. The
+  deterministic recordings and tests remain the required regression signal; the
+  real-LLM recording is a provider/network smoke demonstration.
+
+Current implementation notes:
+
+- `docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast` is the expected checked-in
+  cast file.
+- `uv run scripts/a2a_docs_pipeline.py record-real-llm` is the expected
+  regeneration command.
+- The checked-in cast must not contain provider tokens, bearer headers, or other
+  secrets.
+
+## Structured JSON output
+
+A2A protocol support for structured JSON is through `Part.data`, not through an
+LLM-output-schema negotiation feature. The fast-agent integration should treat
+structured JSON as protocol data only when it is represented as structured
+content, and should keep ordinary model text as text.
+
+Expected fast-agent mapping:
+
+- inbound A2A `Part.data` maps into fast-agent prompt content as formatted JSON
+  text unless a richer internal structured-content representation is added later;
+- outbound fast-agent `TextResourceContents` with
+  `mimeType="application/json"` maps to A2A `Part.data`;
+- ordinary model text that happens to contain JSON remains a text artifact;
+- docs must make this distinction explicit so users do not assume A2A provides
+  model-level JSON schema enforcement.
+
+Open follow-up:
+
+- If fast-agent adds a first-class structured-output content object later, the
+  A2A bridge should map that object directly to `Part.data` instead of requiring
+  JSON `TextResourceContents`.
diff --git a/plan/05-21-a2a-oauth-hf-hosting.md b/plan/05-21-a2a-oauth-hf-hosting.md
new file mode 100644
index 000000000..08cba6f9e
--- /dev/null
+++ b/plan/05-21-a2a-oauth-hf-hosting.md
@@ -0,0 +1,146 @@
+# A2A OAuth and Hugging Face Hosting Goal
+
+Status: bearer/Hugging Face pass and first A2A client browser OAuth pass implemented.
+
+## Goal
+
+Add OAuth authentication for fast-agent A2A servers and clients, with Hugging
+Face Spaces as a first-class hosted deployment target.
+
+The key requirement is credential pass-through: when an A2A server is hosted on
+Hugging Face, the caller's OAuth/bearer credential must be available inside the
+fast-agent request context so Hugging Face Inference Provider models, the
+Hugging Face MCP server, and Hugging Face tools can act using that caller
+credential.
+
+## Outcomes
+
+- [x] A2A servers can enforce bearer authentication for `JSONRPC` and
+  `HTTP+JSON` endpoints.
+- [x] Hugging Face Spaces deployment supports `X-HF-Authorization` and standard
+  `Authorization` bearer headers, matching the existing MCP behavior.
+- [x] The request bearer token is written to `request_bearer_token` while the
+  fast-agent agent handles an A2A request.
+- [x] Served A2A AgentCards advertise security metadata through A2A
+  `security_schemes`, `security_requirements`, and per-skill security
+  requirements.
+- [x] A2A clients can send static bearer/HF tokens, including automatic
+  Hugging Face token headers for Hugging Face URLs.
+- [x] A2A clients can use the existing fast-agent OAuth browser flow where the
+  remote AgentCard advertises OAuth or OIDC.
+- [x] Documentation includes an A2A "Host on HF" page with Space setup,
+  environment variables, OAuth behavior, AgentCard security metadata, and
+  inference-provider usage.
+
+## Existing Pieces to Reuse
+
+- `fast_agent.mcp.auth.middleware.HFAuthHeaderMiddleware`
+  - Normalizes `X-HF-Authorization` to `Authorization`.
+- `fast_agent.mcp.auth.context.request_bearer_token`
+  - Request-scoped token context already consumed by provider key resolution.
+- `fast_agent.mcp.auth.presence.PresenceTokenVerifier`
+  - Useful model for the initial server-side "present bearer token" check.
+- `fast_agent.mcp.hf_auth.add_hf_auth_header`
+  - Adds Hugging Face token headers for `hf.co`, `huggingface.co`, and
+    `*.hf.space`.
+- `fast_agent.mcp.oauth_client`
+  - Existing OAuth login, callback, keyring, and client metadata machinery.
+- A2A SDK `AuthInterceptor`, `CredentialService`, `ClientCallContext`, and
+  AgentCard security scheme types.
+
+## Proposed Server Design
+
+1. Add A2A serve auth settings:
+   - initially reuse `FAST_AGENT_SERVE_OAUTH=huggingface`;
+   - reuse `FAST_AGENT_OAUTH_SCOPES`;
+   - reuse `FAST_AGENT_OAUTH_RESOURCE_URL`;
+   - later expose CLI/config fields if needed.
+2. Wrap `AgentA2AServer.asgi_app()` with auth middleware when auth is enabled:
+   - public AgentCard route remains reachable;
+   - `/a2a/jsonrpc` and `/a2a/rest` require bearer auth;
+   - missing/invalid auth returns `401` with `WWW-Authenticate`.
+3. Normalize Hugging Face Space headers:
+   - accept `Authorization: Bearer ...`;
+   - accept `X-HF-Authorization: Bearer ...` and copy to `Authorization`.
+4. Propagate credentials:
+   - extract the bearer token from request headers/scope;
+   - set `request_bearer_token` around the agent `generate(...)` call;
+   - reset the context variable after the request.
+5. Advertise security in the AgentCard:
+   - use `HTTPAuthSecurityScheme(scheme="bearer")` for the first pass;
+   - add `OAuth2SecurityScheme` or `OpenIdConnectSecurityScheme` once we have
+     provider metadata details that A2A clients can use reliably.
+
+Implemented first pass:
+
+- `FAST_AGENT_SERVE_OAUTH=huggingface` enables A2A bearer auth.
+- AgentCard discovery stays public.
+- `/a2a/jsonrpc` and `/a2a/rest` require a bearer token.
+- `X-HF-Authorization` is accepted and normalized for Hugging Face Spaces.
+- The bearer token is available through `request_bearer_token` while the
+  fast-agent agent runs.
+- The public AgentCard advertises an `hf_bearer` HTTP bearer security scheme.
+
+## Proposed Client Design
+
+1. Static token support:
+   - keep explicit `headers` on `A2AAgentConfig`;
+   - add `auth`/`oauth` fields only if the UX needs parity with MCP cards;
+   - automatically add HF token headers for Hugging Face URLs when no explicit
+     auth header is present.
+2. AgentCard-driven credential injection:
+   - inspect `remote_card.security_schemes` and `security_requirements`;
+   - use A2A SDK `AuthInterceptor` with a fast-agent `CredentialService`;
+   - pass per-call `ClientCallContext` so the SDK transports receive
+     `Authorization` or API key headers.
+3. OAuth browser flow:
+   - adapt `fast_agent.mcp.oauth_client` to an A2A-oriented server identity;
+   - store tokens in keyring using a distinct service or identity prefix;
+   - emit OAuth events through CLI/TUI surfaces similarly to `/mcp connect`.
+
+Implemented first pass:
+
+- A2A explicit `headers` remain supported.
+- A2A clients automatically apply Hugging Face token headers through
+  `add_hf_auth_header(...)` for Hugging Face URLs when no explicit auth header
+  is configured.
+- Checked-in A2A AgentCards accept an `auth` block compatible with the existing
+  MCP OAuth settings.
+- `fast-agent --a2a` accepts `--a2a-oauth` and `--no-a2a-oauth`.
+- `/a2a connect` accepts `--oauth` and `--no-oauth`.
+- When `auth` is omitted, A2A clients attach the existing browser OAuth
+  `httpx` auth flow only when the remote AgentCard advertises OAuth2 or OpenID
+  Connect security schemes.
+- The A2A OAuth bridge preserves the A2A base URL as the protected-resource
+  identity instead of rewriting it to an MCP endpoint.
+
+## Testing
+
+- Server auth:
+  - public AgentCard route is accessible;
+  - A2A routes reject missing bearer tokens;
+  - `Authorization` reaches `request_bearer_token`;
+  - `X-HF-Authorization` reaches `request_bearer_token` on HF mode.
+- AgentCard metadata:
+  - auth-enabled A2A server advertises expected security schemes and
+    requirements.
+- Client auth:
+  - explicit A2A headers are sent;
+  - HF token auto-header logic applies only to Hugging Face URLs;
+  - existing explicit auth headers win over auto HF auth.
+  - OAuth/OIDC AgentCards enable the browser OAuth provider;
+  - `auth.oauth: false` suppresses browser OAuth;
+  - checked-in A2A cards, CLI `--a2a`, and `/a2a connect` parse OAuth settings.
+- Inference pass-through:
+  - deterministic test agent reads `request_bearer_token`;
+  - provider-key-manager behavior can use the request token for Hugging Face.
+
+## Open Questions
+
+- Should A2A server auth configuration live only in shared serve environment
+  variables, or should `fast-agent serve a2a` expose first-class `--oauth`
+  flags?
+- Should A2A OAuth tokens share the existing MCP keyring service/index long
+  term, or should they move to a distinct A2A service name after migration?
+- Should authenticated extended AgentCards be implemented as part of this work,
+  or should public cards advertise enough security metadata for the first pass?
diff --git a/plan/05-21-a2a-orientation.md b/plan/05-21-a2a-orientation.md
new file mode 100644
index 000000000..3b56f5f96
--- /dev/null
+++ b/plan/05-21-a2a-orientation.md
@@ -0,0 +1,451 @@
+# A2A orientation guide
+
+Date: 2026-05-21
+Last updated: 2026-05-21
+
+## Purpose
+
+This note is a handoff/orientation guide for fast-agent's current A2A client
+and server work. It points to the key files, deterministic fixtures, docs
+assets, and manual commands so the next session can avoid rediscovery.
+
+## Current feature shape
+
+fast-agent is currently both an A2A **client** and **server** for HTTP
+transports:
+
+- remote A2A agents can be configured via `type: a2a` AgentCards;
+- `/a2a connect` can create a runtime A2A agent from the TUI;
+- JSON-RPC and HTTP+JSON are covered by deterministic integration tests;
+- gRPC is intentionally out of scope for this A2A support pass and is rejected
+  by fast-agent card/command validation;
+- A2A messages map to normal fast-agent user/assistant turns and local history;
+- A2A `context_id`, `task_id`, and task state are tracked on `A2ARemoteAgent`.
+- `fast-agent serve a2a` and `fast-agent serve --transport a2a` expose loaded
+  fast-agent agents over JSON-RPC and HTTP+JSON.
+- served A2A AgentCards advertise one A2A `AgentSkill` per loaded fast-agent
+  agent, plus JSON-RPC and HTTP+JSON interfaces.
+- server-side `contextId` is optional in inbound A2A messages; the SDK resolves
+  one when omitted, and `--instance-scope connection` uses it as the fast-agent
+  instance/session key.
+
+## Key implementation files
+
+### Runtime/client adapter
+
+- `src/fast_agent/a2a/remote_agent.py`
+  - `A2ARemoteAgent` is the main adapter from fast-agent `AgentProtocol` to a
+    remote A2A SDK client.
+  - Builds `SendMessageRequest` from fast-agent messages.
+  - Converts outbound content to A2A `Part`s in `_parts_from_messages(...)`.
+  - Consumes A2A event streams in `_consume_events(...)`.
+  - Tracks `context_id`, `current_task_id`, and `last_task_state`.
+  - Emits `StreamChunk` values from direct message/artifact text updates.
+  - Renders inbound URL/data/raw/text parts via `_part_text(...)`.
+
+- `src/fast_agent/a2a/config.py`
+  - `A2AAgentConfig` fields:
+    - `url`
+    - `transport`
+    - `streaming`
+    - `polling`
+    - `accepted_output_modes`
+    - `headers`
+    - `relative_card_path`
+    - `request_timeout_seconds`
+
+- `src/fast_agent/a2a/connect.py`
+  - URL and argument normalization for `/a2a connect`.
+  - Transport aliases:
+    - `jsonrpc`, `json-rpc`, `rpc` -> `JSONRPC`
+    - `http`, `http+json`, `rest` -> `HTTP+JSON`
+    - `grpc` -> rejected; gRPC is not supported by fast-agent A2A.
+
+### Server adapter
+
+- `src/fast_agent/a2a/server.py`
+  - `AgentA2AServer` exposes fast-agent through SDK JSON-RPC and REST routes.
+  - `FastAgentA2AExecutor` maps A2A messages to `PromptMessageExtended`, calls
+    the selected fast-agent agent, and emits A2A task status/artifact updates.
+  - `TaskArtifactUpdateEvent.append` is used for streaming chunks; the final
+    response can replace the streamed artifact when needed.
+  - `TASK_STATE_INPUT_REQUIRED` is returned when the fast-agent response has
+    `LlmStopReason.PAUSE`.
+  - `AUTH_REQUIRED`, `FAILED`, and `CANCELED` states are mapped from provider
+    auth errors, unexpected execution errors, and cancellation.
+  - raw image/file/data/text URL parts are bridged to and from fast-agent
+    content blocks.
+
+### AgentCard/direct factory wiring
+
+- `src/fast_agent/core/agent_card_loader.py`
+  - Parses and serializes `type: a2a` cards.
+
+- `src/fast_agent/core/agent_card_rules.py`
+  - A2A card validation/rules.
+
+- `src/fast_agent/core/direct_factory.py`
+  - `_create_a2a_agent(...)` constructs and initializes `A2ARemoteAgent`.
+
+### UI/TUI wiring
+
+- `src/fast_agent/ui/interactive/command_dispatch.py`
+  - Dispatches `/a2a list`, `/a2a status`, `/a2a card`, `/a2a reset`,
+    `/a2a transport`, and `/a2a connect`.
+
+- `src/fast_agent/ui/prompt/parser.py`
+  - Parses `/a2a ...` input into `A2ACommand`.
+
+- `src/fast_agent/ui/prompt/input_toolbar.py`
+  - A2A toolbar treatment.
+  - A2A agents show `A2A`/remote card name instead of local model info.
+
+- `src/fast_agent/ui/prompt/toolbar.py`
+  - Active-agent toolbar styling shows `name[A2A]`.
+
+### CLI wiring
+
+- `src/fast_agent/cli/commands/go.py`
+  - `--a2a` and `--a2a-transport` runtime connection shortcuts.
+
+- `src/fast_agent/cli/commands/serve.py`
+  - `fast-agent serve a2a` subcommand.
+  - legacy-compatible `fast-agent serve --transport a2a` callback path.
+
+## Deterministic A2A fixtures and tests
+
+### Integration fixture
+
+- `tests/integration/a2a/conftest.py`
+  - Starts an in-process deterministic A2A server on a free port.
+  - `EchoAgentExecutor` scenarios:
+    - normal echo;
+    - fake server help: `help`, `?`, `commands`, `menu`, or `what can you do`;
+    - short stream: `please stream`;
+    - long stream: `please long stream`;
+    - file/data/raw response: `respond with files`;
+    - `INPUT_REQUIRED` flow: `need input`, then any follow-up such as `blue`.
+
+### Manual fake server
+
+- `tests/integration/a2a/fake_server.py`
+  - Standalone deterministic fake server for CLI/TUI demos.
+  - Run:
+
+    ```bash
+    uv run python tests/integration/a2a/fake_server.py --port 41242
+    ```
+
+  - AgentCard:
+
+    ```text
+    http://127.0.0.1:41242/.well-known/agent-card.json
+    ```
+
+  - JSON-RPC:
+
+    ```text
+    http://127.0.0.1:41242/a2a/jsonrpc
+    ```
+
+  - HTTP+JSON:
+
+    ```text
+    http://127.0.0.1:41242/a2a/rest
+    ```
+
+  - Useful prompts:
+    - `help`
+    - `hello`
+    - `please stream`
+    - `please long stream`
+    - `respond with files`
+    - `need input`, followed by `blue`
+
+### Tests
+
+- `tests/integration/a2a/test_remote_agent_runtime.py`
+  - JSON-RPC and HTTP+JSON text send.
+  - Short streaming chunks.
+  - Long streaming chunks.
+  - Inbound URL/data/raw rendering.
+  - Outbound URL/raw parts.
+  - JSON `TextResourceContents` emitted as A2A data parts.
+  - no-history A2A context reset except while continuing `INPUT_REQUIRED`.
+  - `/a2a connect` runtime agent creation.
+  - `INPUT_REQUIRED` task preservation and follow-up completion.
+
+- `tests/integration/a2a/test_fast_agent_a2a_server.py`
+  - fast-agent served as JSON-RPC and HTTP+JSON A2A server.
+  - context/session continuity, request/shared/connection instance scopes, and
+    served-agent `use_history` behavior.
+  - generated AgentCard interfaces, wildcard host rewriting, and A2A
+    `AgentSkill` advertisement/routing.
+  - streaming artifact updates, final artifact replacement, and cancellation.
+  - raw image/audio/file preservation and outbound raw/data/url/text mapping.
+  - task list/get/cancel behavior through SDK handlers.
+
+- Unit coverage:
+  - `tests/unit/fast_agent/test_a2a_remote_agent_events.py`
+  - `tests/unit/fast_agent/test_a2a_remote_agent_config.py`
+  - `tests/unit/fast_agent/a2a_connect_test.py`
+  - `tests/unit/fast_agent/cli/test_a2a_go_options.py`
+  - `tests/unit/fast_agent/cli/test_a2a_serve_options.py`
+  - `tests/unit/fast_agent/ui/test_parse_a2a_commands.py`
+  - `tests/unit/fast_agent/ui/test_a2a_command_dispatch.py`
+  - `tests/unit/fast_agent/core/test_a2a_error_formatting.py`
+
+Run:
+
+```bash
+uv run pytest tests/integration/a2a -q
+```
+
+## Manual smoke commands
+
+Start fake server:
+
+```bash
+uv run python tests/integration/a2a/fake_server.py --port 41242
+```
+
+Short stream:
+
+```bash
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport JSONRPC \
+  --message "please stream" \
+  --quiet
+```
+
+Long stream:
+
+```bash
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport JSONRPC \
+  --message "please long stream" \
+  --quiet
+```
+
+Files/data/raw:
+
+```bash
+uv run fast-agent -x \
+  --a2a http://127.0.0.1:41242 \
+  --a2a-transport HTTP+JSON \
+  --message "respond with files" \
+  --quiet
+```
+
+TUI:
+
+```bash
+uv run fast-agent -x --a2a http://127.0.0.1:41242 --a2a-transport JSONRPC
+```
+
+Then try:
+
+```text
+/a2a help
+help
+/a2a status
+/a2a transport
+please stream
+please long stream
+respond with files
+need input
+/a2a status
+blue
+```
+
+Serve fast-agent as A2A:
+
+```bash
+uv run fast-agent serve a2a \
+  --host 127.0.0.1 \
+  --port 41241 \
+  --instance-scope connection \
+  --agent-cards ./agents
+```
+
+Fetch the served card:
+
+```bash
+curl -s http://127.0.0.1:41241/.well-known/agent-card.json | jq .
+```
+
+## Docs and recordings
+
+- `docs/docs/a2a/getting-started.md`
+  - User-facing A2A getting-started page.
+  - Includes short streaming, long streaming, file/data/raw, and
+    `INPUT_REQUIRED` explanation.
+
+- `docs/docs/a2a/client.md`
+  - Client CLI, AgentCard, TUI, streaming, `INPUT_REQUIRED`, content mapping,
+    error handling, and resumption guidance.
+  - Embeds deterministic client recordings and the provider-backed real LLM
+    recording.
+
+- `docs/docs/a2a/server.md`
+  - `fast-agent serve a2a`, served AgentCard interface URLs, runtime wiring,
+    A2A `AgentSkill` exposure, instance scopes, streaming, errors, file parts,
+    and structured JSON.
+
+- `docs/docs/a2a/api.md`
+  - Direct `A2ARemoteAgent` usage, `AgentA2AServer` embedding, raw JSON-RPC and
+    HTTP+JSON examples, content mapping, and explicit JSON data part examples.
+
+- `docs/docs/a2a/protocol-compliance.md`
+  - Current support matrix, known gaps, and verification coverage against A2A
+    Protocol Specification 1.0.
+
+- `docs/docs/a2a/snippets/`
+  - Generated snippets consumed by the docs page.
+
+- `docs/docs/assets/a2a/a2a-streaming-files.cast`
+  - Embedded asciinema recording for the A2A TUI flow.
+
+- `docs/docs/assets/a2a/a2a-real-llm-hf-streaming.cast`
+  - Provider/network smoke recording.
+  - Shows `fast-agent serve a2a` backed by `codexresponses.gpt-5.4-mini`,
+    connected to the Hugging Face MCP server, and an interactive fast-agent A2A
+    client asking for a markdown answer about trending Hugging Face models.
+
+- `/home/ssmith/plan/records/a2a-streaming-files-input-required.cast`
+  - Local copy of the latest generated recording.
+
+- `scripts/a2a_docs_pipeline.py`
+  - `generate`: refresh snippets and CLI outputs.
+  - `check`: verify snippets/assets are in sync.
+  - `record`: regenerate the asciinema recording using tmux.
+  - `record-real-llm`: regenerate the provider-backed Hugging Face MCP/LLM
+    streaming recording; requires `HF_TOKEN`, `OPENAI_API_KEY`, network access,
+    `asciinema`, `tmux`, and `curl`.
+  - Note: the script uses fixed port `41242`; if a stale fake server is already
+    bound there it now fails early.
+
+Commands:
+
+```bash
+uv run scripts/a2a_docs_pipeline.py generate
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/a2a_docs_pipeline.py record
+uv run scripts/a2a_docs_pipeline.py record-real-llm
+```
+
+## A2A SDK reference checkout
+
+Local SDK checkout:
+
+```text
+../a2a-python/
+```
+
+Useful files there:
+
+- `samples/hello_world_agent.py`
+  - Standalone SDK sample server exposing JSON-RPC, HTTP+JSON, gRPC v1.0, and
+    gRPC v0.3 compatibility.
+
+- `tck/sut_agent.py`
+  - Useful reference for `TaskStatusUpdateEvent`, `TASK_STATE_WORKING`,
+    `TASK_STATE_INPUT_REQUIRED`, cancellation, and multi-transport setup.
+
+- `tests/integration/test_end_to_end.py`
+  - Good fixture/reference for direct message responses, task responses,
+    status-message text, artifact updates, and `INPUT_REQUIRED`.
+
+## Protocol/event concepts currently used
+
+Observed current A2A stream payload oneof fields:
+
+- `task`
+- `message`
+- `status_update`
+- `artifact_update`
+
+Relevant proto/model fields:
+
+- `Task.context_id`
+- `Task.id`
+- `Task.status.state`
+- `Task.artifacts`
+- `TaskStatus.message`
+- `TaskStatusUpdateEvent.task_id`
+- `TaskStatusUpdateEvent.context_id`
+- `TaskArtifactUpdateEvent.artifact`
+- `TaskArtifactUpdateEvent.append`
+- `TaskArtifactUpdateEvent.last_chunk`
+- `Message.context_id`
+- `Message.task_id`
+- `Message.parts`
+- `Part.text`
+- `Part.raw`
+- `Part.url`
+- `Part.data`
+- `Part.filename`
+- `Part.media_type`
+
+Current fast-agent mapping:
+
+- one `A2ARemoteAgent` instance owns one active remote `context_id`;
+- terminal non-input-required tasks clear `current_task_id`;
+- `TASK_STATE_INPUT_REQUIRED` preserves `current_task_id`;
+- the next user turn is sent with that pending `task_id`;
+- `/a2a reset` creates a fresh remote context and clears task state.
+- when local A2A card/request `use_history=False`, the client generates a fresh
+  remote context for completed turns, but preserves context/task state while
+  continuing `INPUT_REQUIRED`.
+- on the server, `--instance-scope connection` maps resolved A2A `context_id`
+  to a fast-agent instance; `shared` ignores per-context instance isolation and
+  `request` creates a fresh instance per message.
+- structured JSON is protocol-level data only when represented as A2A `data`
+  parts or fast-agent `TextResourceContents(mimeType="application/json")`.
+  Ordinary model text remains a text artifact.
+
+## Known gaps / next good targets
+
+- A2A TVD capability chip:
+  - remote card `default_input_modes` and `skills[*].input_modes` can be used to
+    infer text/document/vision capability;
+  - toolbar currently special-cases A2A and does not render a TVD segment.
+
+- Inbound file persistence:
+  - server-side inbound raw bytes are preserved as `BlobResourceContents` for
+    the fast-agent agent;
+  - client-side inbound remote raw bytes are still rendered as readable text
+    placeholders and are not saved to `<env>/a2a/` yet.
+
+- gRPC integration coverage:
+  - intentionally out of scope for the current A2A HTTP support target.
+
+- Persistent task/session storage:
+  - the server currently uses the SDK `InMemoryTaskStore` plus in-memory
+    fast-agent instances; process restart loses A2A task/context state.
+
+- A2A security schemes:
+  - client connections support headers;
+  - served A2A AgentCards do not yet advertise configurable security schemes or
+    enforce A2A transport-level client authentication.
+
+## Required validation after A2A changes
+
+At minimum:
+
+```bash
+uv run pytest tests/integration/a2a -q
+uv run pytest tests/unit/fast_agent/test_a2a_remote_agent_events.py \
+  tests/unit/fast_agent/test_a2a_remote_agent_config.py \
+  tests/unit/fast_agent/a2a_connect_test.py \
+  tests/unit/fast_agent/cli/test_a2a_go_options.py \
+  tests/unit/fast_agent/cli/test_a2a_serve_options.py \
+  tests/unit/fast_agent/ui/test_parse_a2a_commands.py \
+  tests/unit/fast_agent/ui/test_a2a_command_dispatch.py \
+  tests/unit/fast_agent/core/test_a2a_error_formatting.py -q
+uv run scripts/a2a_docs_pipeline.py check
+uv run scripts/lint.py
+uv run scripts/typecheck.py
+```
diff --git a/scripts/a2a_docs_pipeline.py b/scripts/a2a_docs_pipeline.py
new file mode 100755
index 000000000..a143122c2
--- /dev/null
+++ b/scripts/a2a_docs_pipeline.py
@@ -0,0 +1,513 @@
+#!/usr/bin/env python3
+"""Generate and verify A2A getting-started docs assets.
+
+This script keeps docs examples, smoke-test commands, and captured output aligned.
+It starts the deterministic fake A2A server, runs the documented CLI examples,
+and writes the snippets consumed by docs/docs/a2a/getting-started.md.
+
+Usage:
+    uv run scripts/a2a_docs_pipeline.py generate
+    uv run scripts/a2a_docs_pipeline.py check
+    uv run scripts/a2a_docs_pipeline.py record
+    uv run scripts/a2a_docs_pipeline.py record-real-llm
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib.util
+import os
+import shutil
+import signal
+import subprocess
+import sys
+import time
+import urllib.request
+from pathlib import Path
+
+
+def _load_docs_asset_helpers():
+    try:
+        from docs_assets import record_asciinema_cast, require_recording_tools
+
+        return record_asciinema_cast, require_recording_tools
+    except ModuleNotFoundError:
+        path = Path(__file__).resolve().parent / "docs_assets.py"
+        spec = importlib.util.spec_from_file_location("docs_assets", path)
+        if spec is None or spec.loader is None:
+            raise
+        module = importlib.util.module_from_spec(spec)
+        sys.modules[spec.name] = module
+        spec.loader.exec_module(module)
+        return module.record_asciinema_cast, module.require_recording_tools
+
+
+record_asciinema_cast, require_recording_tools = _load_docs_asset_helpers()
+
+ROOT = Path(__file__).resolve().parent.parent
+DOCS_A2A = ROOT / "docs" / "docs" / "a2a"
+SNIPPETS = DOCS_A2A / "snippets"
+ASSETS = ROOT / "docs" / "docs" / "assets" / "a2a"
+RECORDS = Path.home() / "plan" / "records"
+PORT = 41242
+REAL_LLM_PORT = 41243
+BASE_URL = f"http://127.0.0.1:{PORT}"
+REAL_LLM_BASE_URL = f"http://127.0.0.1:{REAL_LLM_PORT}"
+REAL_LLM_MCP_URL = "https://hf.co/mcp"
+REAL_LLM_MODEL = "codexresponses.gpt-5.4-mini"
+REAL_LLM_CAST = "a2a-real-llm-hf-streaming.cast"
+REAL_LLM_SERVER_LOG = Path("/tmp/a2a-real-llm-server.log")
+REAL_LLM_READY_TIMEOUT_SECONDS = 90.0
+
+START_FAKE_SERVER = f"uv run python tests/integration/a2a/fake_server.py --port {PORT}\n"
+STREAM_COMMAND = f"""uv run fast-agent -x \\
+  --a2a {BASE_URL} \\
+  --a2a-transport JSONRPC \\
+  --message "please stream" \\
+  --quiet
+"""
+FILES_COMMAND = f"""uv run fast-agent -x \\
+  --a2a {BASE_URL} \\
+  --a2a-transport HTTP+JSON \\
+  --message "respond with files" \\
+  --quiet
+"""
+AGENT_CARD = f"""type: a2a
+name: fake_remote
+url: {BASE_URL}
+transport: JSONRPC
+"""
+TUI_SESSION = "/a2a help\nhelp\n/a2a status\n/a2a transport\nplease stream\nrespond with files\nneed input\nblue\n"
+
+STATIC_SNIPPETS = {
+    "start-fake-server.sh": START_FAKE_SERVER,
+    "cli-stream-command.sh": STREAM_COMMAND,
+    "cli-files-command.sh": FILES_COMMAND,
+    "agent-card.yaml": AGENT_CARD,
+    "tui-session.txt": TUI_SESSION,
+}
+
+
+def _write(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def _run(command: str) -> str:
+    result = subprocess.run(
+        command,
+        cwd=ROOT,
+        shell=True,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=False,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"command failed ({result.returncode}): {command}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
+        )
+    return result.stdout.strip() + "\n"
+
+
+def _wait_for_server(process: subprocess.Popen[str]) -> None:
+    deadline = time.monotonic() + 10
+    url = f"{BASE_URL}/.well-known/agent-card.json"
+    while time.monotonic() < deadline:
+        if process.poll() is not None:
+            raise RuntimeError("fake A2A server exited before it was ready")
+        try:
+            with urllib.request.urlopen(url, timeout=0.5) as response:  # noqa: S310 - local test server
+                if response.status == 200:
+                    return
+        except OSError:
+            time.sleep(0.2)
+    raise TimeoutError(f"fake A2A server did not become ready at {url}")
+
+
+def _start_server() -> subprocess.Popen[str]:
+    process = subprocess.Popen(
+        [
+            "uv",
+            "run",
+            "python",
+            "tests/integration/a2a/fake_server.py",
+            "--port",
+            str(PORT),
+        ],
+        cwd=ROOT,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    time.sleep(0.2)
+    if process.poll() is not None:
+        stdout = process.stdout.read() if process.stdout else ""
+        stderr = process.stderr.read() if process.stderr else ""
+        raise RuntimeError(
+            f"fake A2A server exited immediately. Is port {PORT} already in use?\n"
+            f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}"
+        )
+    _wait_for_server(process)
+    return process
+
+
+def _stop_server(process: subprocess.Popen[str]) -> None:
+    process.terminate()
+    try:
+        process.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        process.kill()
+        process.wait(timeout=5)
+
+
+def _log_tail(path: Path, *, lines: int = 80) -> str:
+    if not path.exists():
+        return f"{path} does not exist"
+    content = path.read_text(encoding="utf-8", errors="replace").splitlines()
+    return "\n".join(content[-lines:])
+
+
+def _wait_for_url(
+    url: str,
+    *,
+    process: subprocess.Popen[str] | None = None,
+    log_path: Path | None = None,
+    timeout_seconds: float = 10.0,
+) -> None:
+    deadline = time.monotonic() + timeout_seconds
+    while time.monotonic() < deadline:
+        if process is not None and process.poll() is not None:
+            details = f"\nLOG:\n{_log_tail(log_path)}" if log_path is not None else ""
+            raise RuntimeError(
+                f"process exited before {url} became ready with status {process.returncode}{details}"
+            )
+        try:
+            with urllib.request.urlopen(url, timeout=0.5) as response:  # noqa: S310 - docs smoke URL
+                if response.status == 200:
+                    return
+        except OSError:
+            time.sleep(0.5)
+    details = f"\nLOG:\n{_log_tail(log_path)}" if log_path is not None else ""
+    raise TimeoutError(f"{url} did not become ready within {timeout_seconds:.1f}s{details}")
+
+
+def generate() -> None:
+    SNIPPETS.mkdir(parents=True, exist_ok=True)
+    ASSETS.mkdir(parents=True, exist_ok=True)
+    for filename, text in STATIC_SNIPPETS.items():
+        _write(SNIPPETS / filename, text)
+
+    server = _start_server()
+    try:
+        _write(SNIPPETS / "cli-stream-output.txt", _run(STREAM_COMMAND))
+        _write(SNIPPETS / "cli-files-output.txt", _run(FILES_COMMAND))
+    finally:
+        _stop_server(server)
+
+    source_cast = RECORDS / "a2a-streaming-files.cast"
+    if source_cast.exists():
+        shutil.copyfile(source_cast, ASSETS / "a2a-streaming-files.cast")
+
+
+def check() -> None:
+    expected = dict(STATIC_SNIPPETS)
+    expected["cli-stream-output.txt"] = "stream chunk one\nstream chunk two\n"
+    expected["cli-files-output.txt"] = (
+        "file response\n"
+        "[report.pdf](https://example.com/report.pdf) (application/pdf)\n"
+        "```json\n"
+        "{\n"
+        "  \"ok\": true,\n"
+        "  \"source\": \"fake-a2a-server\"\n"
+        "}\n"
+        "```\n"
+        "[note.txt: 3 bytes text/plain]\n"
+    )
+    missing_or_changed: list[str] = []
+    for filename, text in expected.items():
+        path = SNIPPETS / filename
+        if not path.exists() or path.read_text(encoding="utf-8") != text:
+            missing_or_changed.append(str(path.relative_to(ROOT)))
+    required_assets = [
+        ASSETS / "a2a-streaming-files.cast",
+        ASSETS / "a2a-client-cli.cast",
+        ASSETS / "a2a-client-input-required.cast",
+        ASSETS / "a2a-server-card.cast",
+        ASSETS / REAL_LLM_CAST,
+        ROOT / "docs" / "docs" / "assets" / "vendor" / "asciinema-player" / "asciinema-player.css",
+        ROOT / "docs" / "docs" / "assets" / "vendor" / "asciinema-player" / "catppuccin.css",
+        ROOT / "docs" / "docs" / "assets" / "vendor" / "asciinema-player" / "asciinema-player.min.js",
+    ]
+    for asset in required_assets:
+        if not asset.exists():
+            missing_or_changed.append(str(asset.relative_to(ROOT)))
+
+    page = DOCS_A2A / "getting-started.md"
+    page_text = page.read_text(encoding="utf-8") if page.exists() else ""
+    for required_text in [
+        "AsciinemaPlayer.create",
+        "../../assets/a2a/a2a-streaming-files.cast",
+        "../../assets/vendor/asciinema-player/asciinema-player.css",
+        "../../assets/vendor/asciinema-player/catppuccin.css",
+        "../../assets/vendor/asciinema-player/asciinema-player.min.js",
+        "fast-agent-dark",
+        "fast-agent-light",
+    ]:
+        if required_text not in page_text:
+            missing_or_changed.append(f"{page.relative_to(ROOT)} missing {required_text}")
+
+    page_assets = {
+        DOCS_A2A / "client.md": [
+            "../../assets/a2a/a2a-client-cli.cast",
+            "../../assets/a2a/a2a-client-input-required.cast",
+            f"../../assets/a2a/{REAL_LLM_CAST}",
+        ],
+        DOCS_A2A / "server.md": ["../../assets/a2a/a2a-server-card.cast"],
+    }
+    for asset_page, required_texts in page_assets.items():
+        asset_page_text = asset_page.read_text(encoding="utf-8") if asset_page.exists() else ""
+        for required_text in required_texts:
+            if required_text not in asset_page_text:
+                missing_or_changed.append(
+                    f"{asset_page.relative_to(ROOT)} missing {required_text}"
+                )
+
+    if missing_or_changed:
+        raise SystemExit(
+            "A2A docs snippets/assets are stale; run `uv run scripts/a2a_docs_pipeline.py generate`.\n"
+            + "\n".join(missing_or_changed)
+        )
+
+
+def record() -> None:
+    generate()
+    try:
+        require_recording_tools()
+    except RuntimeError as exc:
+        print(f"{exc}; generated text snippets only", file=sys.stderr)
+        return
+
+    driver = Path("/tmp/a2a-docs-record.sh")
+    driver.write_text(
+        f"""#!/usr/bin/env bash
+set -euo pipefail
+SESSION=a2a_docs_cast
+ROOT={ROOT}
+BASE_URL={BASE_URL}
+
+tmux kill-session -t "$SESSION" 2>/dev/null || true
+tmux new-session -d -s "$SESSION" -x 104 -y 27 \
+  "cd '$ROOT' && TERM=xterm-256color COLORTERM=truecolor FORCE_COLOR=1 FAST_AGENT_KEYRING_NOTICE=0 FAST_AGENT_MODEL=passthrough uv run fast-agent -x --a2a '$BASE_URL' --a2a-transport JSONRPC"
+tmux set-option -t "$SESSION" status off >/dev/null
+
+(
+  sleep 4
+  tmux send-keys -t "$SESSION" '/a2a help' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" 'help' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" 'please stream' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" 'respond with files' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" 'need input' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" 'blue' Enter
+  sleep 4
+  tmux send-keys -t "$SESSION" '/exit' Enter
+  sleep 1
+  tmux kill-session -t "$SESSION" 2>/dev/null || true
+) &
+
+tmux attach-session -t "$SESSION" || true
+""",
+        encoding="utf-8",
+    )
+    driver.chmod(0o755)
+
+    server = _start_server()
+    try:
+        record_asciinema_cast(
+            output=ASSETS / "a2a-streaming-files.cast",
+            title="fast-agent A2A streaming, files, and input-required demo",
+            command=str(driver),
+            cols=104,
+            rows=27,
+            cleanup_session="a2a_docs_cast",
+        )
+    finally:
+        _stop_server(server)
+
+
+def _require_real_llm_recording_tools() -> None:
+    try:
+        require_recording_tools(("asciinema", "tmux", "curl"))
+    except RuntimeError as exc:
+        raise SystemExit(str(exc).replace("Cannot record docs assets", "record-real-llm")) from exc
+    missing_env = [
+        name
+        for name in ["HF_TOKEN", "OPENAI_API_KEY"]
+        if not os.environ.get(name)
+    ]
+    if missing_env:
+        raise SystemExit(
+            "record-real-llm requires environment variables: " + ", ".join(missing_env)
+        )
+
+
+def _start_real_llm_server(instruction: Path) -> subprocess.Popen[str]:
+    REAL_LLM_SERVER_LOG.unlink(missing_ok=True)
+    log_file = REAL_LLM_SERVER_LOG.open("w", encoding="utf-8")
+    env = os.environ.copy()
+    env["FAST_AGENT_KEYRING_NOTICE"] = "0"
+    model = env.get("A2A_REAL_LLM_MODEL", REAL_LLM_MODEL)
+    hf_mcp_url = env.get("A2A_HF_MCP_URL", REAL_LLM_MCP_URL)
+    command = [
+        "uv",
+        "run",
+        "fast-agent",
+        "serve",
+        "a2a",
+        "--host",
+        "127.0.0.1",
+        "--port",
+        str(REAL_LLM_PORT),
+        "--name",
+        "hf-model-research",
+        "--model",
+        model,
+        "--url",
+        hf_mcp_url,
+        "--instruction",
+        str(instruction),
+    ]
+    process = subprocess.Popen(
+        command,
+        cwd=ROOT,
+        env=env,
+        text=True,
+        stdout=log_file,
+        stderr=subprocess.STDOUT,
+        start_new_session=True,
+    )
+    log_file.close()
+    _wait_for_url(
+        f"{REAL_LLM_BASE_URL}/.well-known/agent-card.json",
+        process=process,
+        log_path=REAL_LLM_SERVER_LOG,
+        timeout_seconds=float(
+            os.environ.get(
+                "A2A_REAL_LLM_READY_TIMEOUT_SECONDS",
+                str(REAL_LLM_READY_TIMEOUT_SECONDS),
+            )
+        ),
+    )
+    return process
+
+
+def _stop_real_llm_server(process: subprocess.Popen[str]) -> None:
+    if process.poll() is not None:
+        return
+    try:
+        os.killpg(process.pid, signal.SIGTERM)
+    except ProcessLookupError:
+        return
+    try:
+        process.wait(timeout=10)
+    except subprocess.TimeoutExpired:
+        os.killpg(process.pid, signal.SIGKILL)
+        process.wait(timeout=5)
+
+
+def record_real_llm() -> None:
+    """Record a provider-backed A2A server/client streaming demo."""
+    _require_real_llm_recording_tools()
+    ASSETS.mkdir(parents=True, exist_ok=True)
+    RECORDS.mkdir(parents=True, exist_ok=True)
+
+    instruction = Path("/tmp/a2a-real-llm-instruction.md")
+    instruction.write_text(
+        """You are a concise Hugging Face model research assistant.
+
+Use available Hugging Face MCP tools to answer questions about models. When the
+user asks about trending models, use markdown with a short heading, 3-5 bullets,
+and a brief note about the source or any uncertainty.
+""",
+        encoding="utf-8",
+    )
+
+    server = _start_real_llm_server(instruction)
+    driver = Path("/tmp/a2a-real-llm-record.sh")
+    driver.write_text(
+        f"""#!/usr/bin/env bash
+set -euo pipefail
+SESSION=a2a_real_llm_cast
+ROOT={ROOT}
+BASE_URL={REAL_LLM_BASE_URL}
+SERVER_LOG={REAL_LLM_SERVER_LOG}
+RECORD_SECONDS="${{A2A_REAL_LLM_RECORD_SECONDS:-70}}"
+MODEL="${{A2A_REAL_LLM_MODEL:-{REAL_LLM_MODEL}}}"
+HF_MCP_URL="${{A2A_HF_MCP_URL:-{REAL_LLM_MCP_URL}}}"
+PROMPT='Use the Hugging Face MCP server if available. Answer in markdown: what models are trending on Hugging Face right now? Include concise bullets and mention any uncertainty.'
+
+tmux kill-session -t "$SESSION" 2>/dev/null || true
+tmux new-session -d -s "$SESSION" -x 120 -y 32 \
+  "printf 'fast-agent A2A server ready\\nmodel: %s\\nMCP: %s\\nlog: %s\\n\\n' '$MODEL' '$HF_MCP_URL' '$SERVER_LOG'; tail -n 80 -f '$SERVER_LOG'"
+tmux set-option -t "$SESSION" status off >/dev/null
+tmux split-window -v -t "$SESSION" -l 20 \
+  "cd '$ROOT' && printf 'A2A card: %s/.well-known/agent-card.json\\n' '$BASE_URL'; curl -fsS '$BASE_URL/.well-known/agent-card.json' | python -m json.tool | sed -n '1,22p'; printf '\\ninteractive A2A JSON-RPC client\\n'; TERM=xterm-256color COLORTERM=truecolor FORCE_COLOR=1 FAST_AGENT_KEYRING_NOTICE=0 FAST_AGENT_MODEL=passthrough uv run fast-agent -x --noenv --a2a '$BASE_URL' --a2a-transport JSONRPC"
+
+(
+  for _ in $(seq 1 120); do
+    if tmux capture-pane -p -t "$SESSION":0.1 | grep -q 'a2a_remote'; then
+      break
+    fi
+    sleep 0.5
+  done
+  sleep 1
+  tmux send-keys -l -t "$SESSION":0.1 "$PROMPT"
+  tmux send-keys -t "$SESSION":0.1 Enter
+  sleep "$RECORD_SECONDS"
+  tmux send-keys -t "$SESSION":0.1 '/exit' Enter
+  sleep 2
+  tmux kill-session -t "$SESSION" 2>/dev/null || true
+) &
+
+tmux select-pane -t "$SESSION":0.1
+tmux attach-session -t "$SESSION" || true
+""",
+        encoding="utf-8",
+    )
+    driver.chmod(0o755)
+
+    try:
+        record_asciinema_cast(
+            output=ASSETS / REAL_LLM_CAST,
+            title="fast-agent A2A real LLM Hugging Face MCP streaming demo",
+            command=str(driver),
+            cols=120,
+            rows=32,
+            cleanup_session="a2a_real_llm_cast",
+        )
+    finally:
+        _stop_real_llm_server(server)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("command", choices=["generate", "check", "record", "record-real-llm"])
+    args = parser.parse_args()
+    if args.command == "generate":
+        generate()
+    elif args.command == "check":
+        check()
+    elif args.command == "record":
+        record()
+    else:
+        record_real_llm()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/docs.py b/scripts/docs.py
index 6b77540be..53bec53f5 100755
--- a/scripts/docs.py
+++ b/scripts/docs.py
@@ -5,6 +5,7 @@
 Usage:
     uv run scripts/docs.py install    # Install/sync dev dependencies
     uv run scripts/docs.py generate   # Generate reference docs from source
+    uv run scripts/docs.py a2a       # Regenerate A2A getting-started snippets
     uv run scripts/docs.py social [--page path.md]
                                       # Generate committed Open Graph card PNGs
     uv run scripts/docs.py social-contact-sheet
@@ -60,6 +61,15 @@ def generate() -> int:
     return 0
 
 
+def a2a() -> int:
+    """Regenerate A2A getting-started snippets and verify them."""
+    result = subprocess.run([sys.executable, str(ROOT / "scripts" / "a2a_docs_pipeline.py"), "generate"], cwd=ROOT)
+    if result.returncode != 0:
+        return result.returncode
+    result = subprocess.run([sys.executable, str(ROOT / "scripts" / "a2a_docs_pipeline.py"), "check"], cwd=ROOT)
+    return result.returncode
+
+
 def social(args: list[str]) -> int:
     """Generate per-page Open Graph card PNGs using google-chrome."""
     print("Generating docs social cards...", flush=True)
@@ -191,6 +201,8 @@ def main() -> int:
         return install()
     elif command == "generate":
         return generate()
+    elif command == "a2a":
+        return a2a()
     elif command == "social":
         return social(sys.argv[2:])
     elif command == "check-social":
diff --git a/scripts/docs_assets.py b/scripts/docs_assets.py
index 95ff6cb45..3ced8809c 100644
--- a/scripts/docs_assets.py
+++ b/scripts/docs_assets.py
@@ -62,6 +62,12 @@ def _missing_tools(tools: tuple[str, ...]) -> list[str]:
     return [tool for tool in tools if shutil.which(tool) is None]
 
 
+def require_recording_tools(tools: tuple[str, ...] = ("asciinema", "tmux")) -> None:
+    missing = _missing_tools(tools)
+    if missing:
+        raise RuntimeError("Cannot record docs assets; missing tools: " + ", ".join(missing))
+
+
 def _required_assets() -> list[Path]:
     return [
         VENDOR_ASCIINEMA / "README.md",
@@ -142,7 +148,7 @@ def _record_script(scenario: TerminalCastScenario) -> str:
 
 tmux kill-session -t "$SESSION" 2>/dev/null || true
 tmux new-session -d -s "$SESSION" -x {scenario.cols} -y {scenario.rows} \\
-  "DEMO_FAST_AGENT_HOME=\\$(mktemp -d) && printf '{{}}\\n' > \\\"\\$DEMO_FAST_AGENT_HOME/fast-agent.yaml\\\" && export FAST_AGENT_HOME=\\\"\\$DEMO_FAST_AGENT_HOME\\\" && DEMO_WORKDIR=\\$(mktemp -d -t fast-agent-demo.XXXXXX) && cd \\\"\\$DEMO_WORKDIR\\\" && git init -q && git config user.email docs@example.invalid && git config user.name 'Docs Demo' && printf '# Demo workspace\\n' > README.md && git add README.md && git commit -qm init && printf '\\nLocal edit\\n' >> README.md && unset ENVIRONMENT_DIR FAST_AGENT_RUNTIME_ENVIRONMENT VIRTUAL_ENV && TERM=xterm-256color COLORTERM=truecolor FORCE_COLOR=1 FAST_AGENT_KEYRING_NOTICE=0 TUI__COMPLETION_MENU_RESERVED_LINES=${{TUI__COMPLETION_MENU_RESERVED_LINES:-4}} bash --noprofile --norc"
+  "DEMO_FAST_AGENT_HOME=\\$(mktemp -d) && printf '{{}}\n' > \"\\$DEMO_FAST_AGENT_HOME/fast-agent.yaml\" && export FAST_AGENT_HOME=\"\\$DEMO_FAST_AGENT_HOME\" && DEMO_WORKDIR=\\$(mktemp -d -t fast-agent-demo.XXXXXX) && cd \"\\$DEMO_WORKDIR\" && git init -q && git config user.email docs@example.invalid && git config user.name 'Docs Demo' && printf '# Demo workspace\n' > README.md && git add README.md && git commit -qm init && printf '\nLocal edit\n' >> README.md && unset ENVIRONMENT_DIR FAST_AGENT_RUNTIME_ENVIRONMENT VIRTUAL_ENV && TERM=xterm-256color COLORTERM=truecolor FORCE_COLOR=1 FAST_AGENT_KEYRING_NOTICE=0 TUI__COMPLETION_MENU_RESERVED_LINES=${{TUI__COMPLETION_MENU_RESERVED_LINES:-4}} bash --noprofile --norc"
 tmux set-option -t "$SESSION" status off >/dev/null
 
 (
@@ -182,34 +188,59 @@ def record(name: str) -> int:
         driver = Path(temp_dir) / f"{scenario.name}.sh"
         driver.write_text(_record_script(scenario), encoding="utf-8")
         driver.chmod(0o755)
-        command = [
-            "asciinema",
-            "rec",
-            "--overwrite",
-            "--cols",
-            str(scenario.cols),
-            "--rows",
-            str(scenario.rows),
-            "--idle-time-limit",
-            str(scenario.idle_time_limit),
-            "-t",
-            scenario.title,
-            "-c",
-            str(driver),
-            str(scenario.output),
-        ]
-        try:
-            subprocess.run(command, cwd=ROOT, check=True)
-        finally:
-            subprocess.run(
-                ["tmux", "kill-session", "-t", f"fast_agent_docs_{name.replace('-', '_')}"],
-                check=False,
-            )
-    _trim_terminal_teardown(scenario.output)
+        record_asciinema_cast(
+            output=scenario.output,
+            title=scenario.title,
+            command=str(driver),
+            cols=scenario.cols,
+            rows=scenario.rows,
+            idle_time_limit=scenario.idle_time_limit,
+            cleanup_session=f"fast_agent_docs_{name.replace('-', '_')}",
+        )
     print(f"Recorded {scenario.output.relative_to(ROOT)}")
     return 0
 
 
+def record_asciinema_cast(
+    *,
+    output: Path,
+    title: str,
+    command: str,
+    cols: int,
+    rows: int,
+    idle_time_limit: float = 1.3,
+    cleanup_session: str | None = None,
+    cwd: Path = ROOT,
+) -> None:
+    """Record an asciinema cast using the docs-wide defaults and cleanup rules."""
+    output.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        subprocess.run(
+            [
+                "asciinema",
+                "rec",
+                "--overwrite",
+                "--cols",
+                str(cols),
+                "--rows",
+                str(rows),
+                "--idle-time-limit",
+                str(idle_time_limit),
+                "-t",
+                title,
+                "-c",
+                command,
+                str(output),
+            ],
+            cwd=cwd,
+            check=True,
+        )
+    finally:
+        if cleanup_session:
+            subprocess.run(["tmux", "kill-session", "-t", cleanup_session], check=False)
+    _trim_terminal_teardown(output)
+
+
 def _is_terminal_teardown_event(line: str) -> bool:
     try:
         event = json.loads(line)
diff --git a/src/fast_agent/__init__.py b/src/fast_agent/__init__.py
index da70dc4bb..41c7944cc 100644
--- a/src/fast_agent/__init__.py
+++ b/src/fast_agent/__init__.py
@@ -1,52 +1,60 @@
 """fast-agent - An MCP native agent application framework"""
 
+import importlib
 from typing import TYPE_CHECKING
 
-from fast_agent.config import (
-    AnthropicSettings,
-    AzureSettings,
-    BedrockSettings,
-    DeepSeekSettings,
-    GenericSettings,
-    GoogleSettings,
-    GroqSettings,
-    HuggingFaceSettings,
-    LoggerSettings,
-    MCPElicitationSettings,
-    MCPRootSettings,
-    MCPSamplingSettings,
-    MCPServerAuthSettings,
-    MCPServerSettings,
-    MCPSettings,
-    OpenAISettings,
-    OpenRouterSettings,
-    OpenTelemetrySettings,
-    Settings,
-    SkillsSettings,
-    TensorZeroSettings,
-    XAISettings,
-)
-from fast_agent.types import (
-    ConversationSummary,
-    LlmStopReason,
-    PromptMessageExtended,
-    RequestParams,
-    ResourceLink,
-    audio_link,
-    extract_first,
-    extract_last,
-    find_matches,
-    image_link,
-    resource_link,
-    search_messages,
-    text_content,
-    video_link,
-)
+_CONFIG_EXPORTS = {
+    "AnthropicSettings",
+    "AzureSettings",
+    "BedrockSettings",
+    "DeepSeekSettings",
+    "GenericSettings",
+    "GoogleSettings",
+    "GroqSettings",
+    "HuggingFaceSettings",
+    "LoggerSettings",
+    "MCPElicitationSettings",
+    "MCPRootSettings",
+    "MCPSamplingSettings",
+    "MCPServerAuthSettings",
+    "MCPServerSettings",
+    "MCPSettings",
+    "OpenAISettings",
+    "OpenRouterSettings",
+    "OpenTelemetrySettings",
+    "Settings",
+    "SkillsSettings",
+    "TensorZeroSettings",
+    "XAISettings",
+}
+
+_TYPE_EXPORTS = {
+    "ConversationSummary",
+    "LlmStopReason",
+    "PromptMessageExtended",
+    "RequestParams",
+    "ResourceLink",
+    "audio_link",
+    "extract_first",
+    "extract_last",
+    "find_matches",
+    "image_link",
+    "resource_link",
+    "search_messages",
+    "text_content",
+    "video_link",
+}
 
 
 def __getattr__(name: str):
     """Lazy import heavy modules to avoid circular imports during package initialization."""
-    if name == "Core":
+    if name in _CONFIG_EXPORTS:
+        module = importlib.import_module("fast_agent.config")
+        return getattr(module, name)
+    elif name in _TYPE_EXPORTS:
+        module = importlib.import_module("fast_agent.types")
+        return getattr(module, name)
+    elif name == "Core":
         from fast_agent.core import Core
 
         return Core
@@ -113,10 +121,44 @@ def __getattr__(name: str):
 # Help static analyzers/IDEs resolve symbols and signatures without importing at runtime.
 if TYPE_CHECKING:  # pragma: no cover - typing aid only
     # Provide a concrete import path for type checkers/IDEs
+    from fast_agent.config import AnthropicSettings as AnthropicSettings  # noqa: F401
+    from fast_agent.config import AzureSettings as AzureSettings  # noqa: F401
+    from fast_agent.config import BedrockSettings as BedrockSettings  # noqa: F401
+    from fast_agent.config import DeepSeekSettings as DeepSeekSettings  # noqa: F401
+    from fast_agent.config import GenericSettings as GenericSettings  # noqa: F401
+    from fast_agent.config import GoogleSettings as GoogleSettings  # noqa: F401
+    from fast_agent.config import GroqSettings as GroqSettings  # noqa: F401
+    from fast_agent.config import HuggingFaceSettings as HuggingFaceSettings  # noqa: F401
+    from fast_agent.config import LoggerSettings as LoggerSettings  # noqa: F401
+    from fast_agent.config import MCPElicitationSettings as MCPElicitationSettings  # noqa: F401
+    from fast_agent.config import MCPRootSettings as MCPRootSettings  # noqa: F401
+    from fast_agent.config import MCPSamplingSettings as MCPSamplingSettings  # noqa: F401
+    from fast_agent.config import MCPServerAuthSettings as MCPServerAuthSettings  # noqa: F401
+    from fast_agent.config import MCPServerSettings as MCPServerSettings  # noqa: F401
+    from fast_agent.config import MCPSettings as MCPSettings  # noqa: F401
+    from fast_agent.config import OpenAISettings as OpenAISettings  # noqa: F401
+    from fast_agent.config import OpenRouterSettings as OpenRouterSettings  # noqa: F401
+    from fast_agent.config import OpenTelemetrySettings as OpenTelemetrySettings  # noqa: F401
+    from fast_agent.config import Settings as Settings  # noqa: F401
+    from fast_agent.config import SkillsSettings as SkillsSettings  # noqa: F401
+    from fast_agent.config import TensorZeroSettings as TensorZeroSettings  # noqa: F401
+    from fast_agent.config import XAISettings as XAISettings  # noqa: F401
     from fast_agent.core.fastagent import FastAgent as FastAgent  # noqa: F401
     from fast_agent.mcp.prompt import Prompt as Prompt  # noqa: F401
     from fast_agent.types import ConversationSummary as ConversationSummary  # noqa: F401
+    from fast_agent.types import LlmStopReason as LlmStopReason  # noqa: F401
     from fast_agent.types import PromptMessageExtended as PromptMessageExtended  # noqa: F401
+    from fast_agent.types import RequestParams as RequestParams  # noqa: F401
+    from fast_agent.types import ResourceLink as ResourceLink  # noqa: F401
+    from fast_agent.types import audio_link as audio_link  # noqa: F401
+    from fast_agent.types import extract_first as extract_first  # noqa: F401
+    from fast_agent.types import extract_last as extract_last  # noqa: F401
+    from fast_agent.types import find_matches as find_matches  # noqa: F401
+    from fast_agent.types import image_link as image_link  # noqa: F401
+    from fast_agent.types import resource_link as resource_link  # noqa: F401
+    from fast_agent.types import search_messages as search_messages  # noqa: F401
+    from fast_agent.types import text_content as text_content  # noqa: F401
+    from fast_agent.types import video_link as video_link  # noqa: F401
 
 
 __all__ = [
@@ -125,7 +167,7 @@ def __getattr__(name: str):
     "Context",
     "ContextDependent",
     "ServerRegistry",
-    # Configuration and settings (eagerly loaded)
+    # Configuration and settings (lazy loaded)
     "Settings",
     "MCPSettings",
     "MCPServerSettings",
@@ -151,19 +193,19 @@ def __getattr__(name: str):
     # Progress and event tracking (lazy loaded)
     "ProgressAction",
     "ProgressEvent",
-    # Type definitions and enums (eagerly loaded)
+    # Type definitions and enums (lazy loaded)
     "LlmStopReason",
     "RequestParams",
     "PromptMessageExtended",
     "ResourceLink",
     "ConversationSummary",
-    # Content helpers (eagerly loaded)
+    # Content helpers (lazy loaded)
     "text_content",
     "resource_link",
     "image_link",
     "video_link",
     "audio_link",
-    # Search utilities (eagerly loaded)
+    # Search utilities (lazy loaded)
     "search_messages",
     "find_matches",
     "extract_first",
diff --git a/src/fast_agent/a2a/__init__.py b/src/fast_agent/a2a/__init__.py
new file mode 100644
index 000000000..8f43ee5d0
--- /dev/null
+++ b/src/fast_agent/a2a/__init__.py
@@ -0,0 +1,16 @@
+"""A2A support for fast-agent."""
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from fast_agent.a2a.server import AgentA2AServer as AgentA2AServer
+
+
+def __getattr__(name: str):
+    if name == "AgentA2AServer":
+        from fast_agent.a2a.server import AgentA2AServer
+
+        return AgentA2AServer
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+
+__all__ = ["AgentA2AServer"]
diff --git a/src/fast_agent/a2a/config.py b/src/fast_agent/a2a/config.py
new file mode 100644
index 000000000..a085802bd
--- /dev/null
+++ b/src/fast_agent/a2a/config.py
@@ -0,0 +1,18 @@
+"""Configuration for remote A2A agents."""
+
+from dataclasses import dataclass, field
+
+from fast_agent.config import MCPServerAuthSettings
+
+
+@dataclass(frozen=True)
+class A2AAgentConfig:
+    url: str
+    transport: str | None = None
+    streaming: bool = True
+    polling: bool = False
+    accepted_output_modes: list[str] = field(default_factory=list)
+    headers: dict[str, str] = field(default_factory=dict)
+    auth: MCPServerAuthSettings | None = None
+    relative_card_path: str | None = None
+    request_timeout_seconds: float | None = 120.0
diff --git a/src/fast_agent/a2a/connect.py b/src/fast_agent/a2a/connect.py
new file mode 100644
index 000000000..9bb0df631
--- /dev/null
+++ b/src/fast_agent/a2a/connect.py
@@ -0,0 +1,120 @@
+"""Helpers for interactive A2A connection requests."""
+
+from __future__ import annotations
+
+import re
+import shlex
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from urllib.parse import urlsplit, urlunsplit
+
+if TYPE_CHECKING:
+    from fast_agent.config import MCPServerAuthSettings
+
+_TRANSPORT_ALIASES = {
+    "jsonrpc": "JSONRPC",
+    "json-rpc": "JSONRPC",
+    "rpc": "JSONRPC",
+    "http": "HTTP+JSON",
+    "http+json": "HTTP+JSON",
+    "rest": "HTTP+JSON",
+}
+
+
+@dataclass(frozen=True, slots=True)
+class A2AConnectRequest:
+    url: str
+    name: str | None = None
+    transport: str | None = None
+    auth: MCPServerAuthSettings | None = None
+    relative_card_path: str | None = None
+
+
+def parse_a2a_connect_arguments(arguments: str | None) -> tuple[A2AConnectRequest | None, str | None]:
+    if not arguments:
+        return None, "Usage: /a2a connect <base-url-or-card-url> [--transport JSONRPC|HTTP+JSON] [--name NAME] [--card-path PATH] [--oauth|--no-oauth]"
+    try:
+        tokens = shlex.split(arguments)
+    except ValueError as exc:
+        return None, str(exc)
+
+    url: str | None = None
+    name: str | None = None
+    transport: str | None = None
+    card_path: str | None = None
+    trigger_oauth: bool | None = None
+    index = 0
+    while index < len(tokens):
+        token = tokens[index]
+        if token in {"--transport", "-t", "--name", "--card-path"}:
+            if index + 1 >= len(tokens):
+                return None, f"{token} requires a value"
+            value = tokens[index + 1]
+            if token in {"--transport", "-t"}:
+                transport = normalize_a2a_transport(value)
+                if transport is None:
+                    return None, f"Unsupported A2A transport: {value}"
+            elif token == "--name":
+                name = _normalize_agent_name(value)
+                if not name:
+                    return None, f"Invalid agent name: {value}"
+            else:
+                card_path = value
+            index += 2
+            continue
+        if token in {"--oauth", "--no-oauth"}:
+            trigger_oauth = token == "--oauth"
+            index += 1
+            continue
+        if token.startswith("-"):
+            return None, f"Unknown /a2a connect option: {token}"
+        if url is not None:
+            return None, f"Unexpected /a2a connect argument: {token}"
+        url = token
+        index += 1
+
+    if url is None:
+        return None, "A2A base URL or agent-card URL is required"
+    normalized_url, inferred_card_path, error = normalize_a2a_url(url)
+    if error:
+        return None, error
+    return (
+        A2AConnectRequest(
+            url=normalized_url,
+            name=name,
+            transport=transport,
+            auth=(
+                _a2a_auth_settings(trigger_oauth)
+                if trigger_oauth is not None
+                else None
+            ),
+            relative_card_path=card_path or inferred_card_path,
+        ),
+        None,
+    )
+
+
+def _a2a_auth_settings(oauth: bool) -> "MCPServerAuthSettings":
+    from fast_agent.config import MCPServerAuthSettings
+
+    return MCPServerAuthSettings(oauth=oauth)
+
+
+def normalize_a2a_transport(value: str) -> str | None:
+    return _TRANSPORT_ALIASES.get(value.strip().lower())
+
+
+def normalize_a2a_url(url: str) -> tuple[str, str | None, str | None]:
+    parsed = urlsplit(url)
+    if parsed.scheme not in {"http", "https"} or not parsed.netloc:
+        return "", None, "A2A connect expects an http(s) base URL or agent-card URL"
+    path = parsed.path or ""
+    if path.endswith("agent-card.json"):
+        base = urlunsplit((parsed.scheme, parsed.netloc, "", "", ""))
+        relative_path = path if path.startswith("/") else f"/{path}"
+        return base, relative_path, None
+    return url.rstrip("/"), None, None
+
+
+def _normalize_agent_name(value: str) -> str:
+    return re.sub(r"[^A-Za-z0-9_-]+", "_", value.strip()).strip("_")
diff --git a/src/fast_agent/a2a/remote_agent.py b/src/fast_agent/a2a/remote_agent.py
new file mode 100644
index 000000000..85af2b2fe
--- /dev/null
+++ b/src/fast_agent/a2a/remote_agent.py
@@ -0,0 +1,672 @@
+"""Remote A2A agent implementation."""
+
+from __future__ import annotations
+
+import base64
+import json
+import uuid
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+from types import SimpleNamespace
+from typing import TYPE_CHECKING, Any, cast
+
+import httpx
+from a2a.client import A2ACardResolver, ClientConfig, create_client
+from a2a.types import Message, Part, Role, SendMessageRequest, TaskState
+from google.protobuf.json_format import MessageToDict, ParseDict
+from mcp.types import (
+    AudioContent,
+    BlobResourceContents,
+    EmbeddedResource,
+    ImageContent,
+    ResourceLink,
+    TextContent,
+    TextResourceContents,
+)
+
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.agents.llm_decorator import LlmDecorator
+from fast_agent.core.logging.logger import get_logger
+from fast_agent.event_progress import ProgressAction
+from fast_agent.llm.stream_types import StreamChunk
+from fast_agent.mcp.hf_auth import (
+    add_explicit_bearer_auth_header,
+    add_hf_auth_header,
+    get_hf_token_from_env,
+    is_hf_space_url,
+)
+from fast_agent.mcp.oauth_client import build_oauth_provider
+from fast_agent.types import LlmStopReason, PromptMessageExtended, RequestParams
+from fast_agent.ui import console
+from fast_agent.ui.console_display import ConsoleDisplay
+from fast_agent.ui.message_display_helpers import build_user_message_display
+from fast_agent.ui.progress_display import progress_display
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Sequence
+
+    from a2a.types import AgentCard
+    from mcp import Tool
+
+    from fast_agent.a2a.config import A2AAgentConfig
+    from fast_agent.config import MCPServerSettings
+    from fast_agent.context import Context
+
+_TERMINAL_STATES = {
+    "TASK_STATE_COMPLETED",
+    "TASK_STATE_FAILED",
+    "TASK_STATE_CANCELED",
+    "TASK_STATE_CANCELLED",
+    "TASK_STATE_REJECTED",
+    "TASK_STATE_INPUT_REQUIRED",
+    "TASK_STATE_AUTH_REQUIRED",
+}
+
+_ERROR_STATES = {
+    "TASK_STATE_FAILED",
+    "TASK_STATE_CANCELED",
+    "TASK_STATE_CANCELLED",
+    "TASK_STATE_REJECTED",
+    "TASK_STATE_INPUT_REQUIRED",
+    "TASK_STATE_AUTH_REQUIRED",
+}
+
+logger = get_logger(__name__)
+
+SUPPORTED_A2A_HTTP_TRANSPORTS = ["JSONRPC", "HTTP+JSON"]
+_INPUT_REQUIRED_STATE = "TASK_STATE_INPUT_REQUIRED"
+
+
+@dataclass(frozen=True)
+class A2ADiagnostics:
+    url: str
+    transport: str | None
+    remote_name: str | None
+    context_id: str | None
+    current_task_id: str | None
+    last_task_state: str | None
+    selected_transport_class: str | None
+
+
+class A2ARemoteAgent(LlmDecorator):
+    """A fast-agent AgentProtocol adapter for a remote A2A agent."""
+
+    def __init__(
+        self,
+        config: AgentConfig,
+        a2a_config: A2AAgentConfig,
+        context: Context | None = None,
+    ) -> None:
+        super().__init__(config=config, context=context)
+        self.a2a_config = a2a_config
+        self.context_id: str | None = None
+        self.current_task_id: str | None = None
+        self.last_task_state: str | None = None
+        self.remote_card: AgentCard | None = None
+        self.display = ConsoleDisplay(config=context.config if context else None)
+        self._client: Any | None = None
+        self._httpx_client: httpx.AsyncClient | None = None
+        self._stream_listeners: list[Callable[[StreamChunk], None]] = []
+
+    @property
+    def agent_type(self) -> AgentType:
+        return AgentType.A2A
+
+    async def initialize(self) -> None:
+        await super().initialize()
+        headers = add_hf_auth_header(self.a2a_config.url, self.a2a_config.headers)
+        self._httpx_client = httpx.AsyncClient(
+            headers=headers or None,
+            timeout=self.a2a_config.request_timeout_seconds,
+        )
+        client_config = ClientConfig(
+            streaming=self.a2a_config.streaming,
+            polling=self.a2a_config.polling,
+            httpx_client=self._httpx_client,
+            accepted_output_modes=list(self.a2a_config.accepted_output_modes),
+        )
+        if self.a2a_config.transport:
+            client_config.supported_protocol_bindings = [self.a2a_config.transport]
+        else:
+            client_config.supported_protocol_bindings = list(SUPPORTED_A2A_HTTP_TRANSPORTS)
+
+        resolver = A2ACardResolver(
+            self._httpx_client,
+            self.a2a_config.url,
+            self.a2a_config.relative_card_path or "/.well-known/agent-card.json",
+        )
+        self.remote_card = await resolver.get_agent_card()
+        card_headers = _headers_for_resolved_card(
+            url=self.a2a_config.url,
+            headers=headers,
+            explicit_headers=bool(self.a2a_config.headers),
+            card=self.remote_card,
+        )
+        if card_headers != headers:
+            headers = card_headers
+            await self._httpx_client.aclose()
+            self._httpx_client = httpx.AsyncClient(
+                headers=headers or None,
+                timeout=self.a2a_config.request_timeout_seconds,
+            )
+            client_config.httpx_client = self._httpx_client
+        oauth_provider = self._build_oauth_provider_for_card(self.remote_card)
+        if oauth_provider is not None:
+            await self._httpx_client.aclose()
+            self._httpx_client = httpx.AsyncClient(
+                auth=oauth_provider,
+                headers=headers or None,
+                timeout=self.a2a_config.request_timeout_seconds,
+            )
+            client_config.httpx_client = self._httpx_client
+        self._client = await create_client(
+            self.remote_card,
+            client_config=client_config,
+        )
+
+    async def shutdown(self) -> None:
+        client = self._client
+        if client is not None:
+            await client.close()
+            self._client = None
+        if self._httpx_client is not None:
+            await self._httpx_client.aclose()
+            self._httpx_client = None
+        await super().shutdown()
+
+    def add_stream_listener(self, listener: Callable[[StreamChunk], None]) -> Callable[[], None]:
+        self._stream_listeners.append(listener)
+
+        def remove_listener() -> None:
+            try:
+                self._stream_listeners.remove(listener)
+            except ValueError:
+                return
+
+        return remove_listener
+
+    def _clone_constructor_kwargs(self) -> dict[str, Any]:
+        """Carry remote A2A connection configuration into detached clones."""
+        return {"a2a_config": self.a2a_config}
+
+    def _build_oauth_provider_for_card(self, card: AgentCard) -> Any | None:
+        auth_config = self.a2a_config.auth
+        if auth_config is not None and not auth_config.oauth:
+            return None
+        hf_space_bearer = is_hf_space_url(self.a2a_config.url) and _card_advertises_http_bearer(
+            card
+        )
+        if auth_config is None and not (_card_advertises_oauth(card) or hf_space_bearer):
+            return None
+        if self.a2a_config.headers:
+            return None
+        if _headers_for_resolved_card(
+            url=self.a2a_config.url,
+            headers=None,
+            explicit_headers=False,
+            card=card,
+        ):
+            return None
+        return build_oauth_provider(
+            cast(
+                "MCPServerSettings",
+                SimpleNamespace(
+                    name=self.config.name,
+                    transport="http",
+                    url=self.a2a_config.url,
+                    auth=auth_config,
+                ),
+            )
+        )
+
+    def reset_a2a_state(self) -> None:
+        self.context_id = None
+        self.current_task_id = None
+        self.last_task_state = None
+
+    def diagnostics(self) -> A2ADiagnostics:
+        return A2ADiagnostics(
+            url=self.a2a_config.url,
+            transport=self.a2a_config.transport,
+            remote_name=self.remote_card.name if self.remote_card else None,
+            context_id=self.context_id,
+            current_task_id=self.current_task_id,
+            last_task_state=self.last_task_state,
+            selected_transport_class=self._selected_transport_class(),
+        )
+
+    async def generate_impl(
+        self,
+        messages: list[PromptMessageExtended],
+        request_params: RequestParams | None = None,
+        tools: list[Tool] | None = None,
+    ) -> PromptMessageExtended:
+        del tools
+        if self._client is None:
+            raise RuntimeError("A2A remote agent is not initialized")
+
+        use_history = request_params.use_history if request_params else self.config.use_history
+        self._prepare_turn_state(use_history=use_history)
+        self._timestamp_messages(messages)
+        self._display_user_messages(messages)
+        user_text = _latest_text(messages)
+        message = Message(
+            role=Role.ROLE_USER,
+            message_id=str(uuid.uuid4()),
+            parts=_parts_from_messages(messages) or [Part(text=user_text)],
+        )
+        if self.context_id:
+            message.context_id = self.context_id
+        if self.current_task_id:
+            message.task_id = self.current_task_id
+        request = SendMessageRequest(message=message)
+
+        self._log_a2a_progress(ProgressAction.SENDING, details=self._transport_label())
+        remove_live_listener: Callable[[], None] | None = None
+        stream_emitted = False
+        preserve_streamed_frame = False
+
+        with self.display.streaming_assistant_message(
+            name=self.name,
+            model="A2A",
+            bottom_items=[self._transport_label()],
+        ) as stream_handle:
+
+            def update_live_stream(chunk: StreamChunk) -> None:
+                nonlocal stream_emitted
+                stream_emitted = True
+                stream_handle.update_chunk(chunk)
+
+            remove_live_listener = self.add_stream_listener(update_live_stream)
+            try:
+                result = await self._consume_events(self._client.send_message(request))
+            finally:
+                remove_live_listener()
+                remove_live_listener = None
+
+            self._log_a2a_progress(ProgressAction.READY, details=result.state or "completed")
+            response_text = result.text or result.status_text or _state_message(result.state)
+            if result.state in _ERROR_STATES:
+                response_text = f"A2A task {result.state}: {response_text}"
+            stop_reason = (
+                LlmStopReason.PAUSE
+                if result.state == _INPUT_REQUIRED_STATE
+                else LlmStopReason.END_TURN
+            )
+            assistant_message = PromptMessageExtended(
+                role="assistant",
+                content=[TextContent(type="text", text=response_text)],
+                stop_reason=stop_reason,
+            )
+            await stream_handle.wait_for_drain()
+            if stream_emitted and result.state not in _ERROR_STATES:
+                preserve_streamed_frame = stream_handle.preserve_final_frame()
+            stream_handle.finalize(assistant_message)
+
+        if remove_live_listener is not None:
+            remove_live_listener()
+        progress_display.pause(cancel_deferred_on_noop=True)
+        if not preserve_streamed_frame:
+            await self.display.show_assistant_message(
+                assistant_message,
+                name=self.name,
+                model="A2A",
+                bottom_items=[self._transport_label()],
+            )
+        console.console.print()
+        if use_history:
+            self._persist_history(messages, assistant_message)
+        return assistant_message
+
+    def _prepare_turn_state(self, *, use_history: bool) -> None:
+        if use_history:
+            return
+        if self.last_task_state == _INPUT_REQUIRED_STATE and self.current_task_id:
+            return
+        self.reset_a2a_state()
+
+    def _display_user_messages(self, messages: list[PromptMessageExtended]) -> None:
+        display_messages = [message for message in messages if message.role == "user"]
+        if not display_messages:
+            return
+        message_text, attachments = build_user_message_display(display_messages)
+        self.display.show_user_message(
+            message_text,
+            chat_turn=0,
+            name=self.name,
+            attachments=attachments if attachments else None,
+            part_count=len(display_messages) if len(display_messages) > 1 else None,
+        )
+
+    def _transport_label(self) -> str:
+        return f"A2A · {self.a2a_config.transport}" if self.a2a_config.transport else "A2A"
+
+    def _selected_transport_class(self) -> str | None:
+        if self._client is None:
+            return None
+        transport = getattr(self._client, "_transport", None)
+        if transport is None:
+            return self._client.__class__.__name__
+        return transport.__class__.__name__
+
+    def _log_a2a_progress(self, action: ProgressAction, *, details: str = "") -> None:
+        logger.debug(
+            "A2A request progress",
+            data={
+                "progress_action": action,
+                "agent_name": self.name,
+                "target": self.remote_card.name if self.remote_card else self.name,
+                "details": details,
+            },
+        )
+
+    async def _consume_events(self, events: Any) -> "_A2AResult":
+        message_chunks: list[str] = []
+        artifact_order: list[str] = []
+        artifact_texts: dict[str, str] = {}
+        state: str | None = None
+        status_text: str | None = None
+
+        async for event in events:
+            if event.HasField("message"):
+                if event.message.context_id:
+                    self.context_id = event.message.context_id
+                text = _parts_text(event.message.parts)
+                _append_text(message_chunks, text)
+                self._emit_stream(text)
+                continue
+
+            if event.HasField("task"):
+                state = TaskState.Name(event.task.status.state)
+                self._advance_task_state(
+                    state=state,
+                    task_id=event.task.id,
+                    context_id=event.task.context_id,
+                )
+                self._log_a2a_progress(ProgressAction.UPDATED, details=state)
+                for artifact in event.task.artifacts:
+                    _replace_artifact_text(artifact_order, artifact_texts, artifact, _parts_text(artifact.parts))
+                continue
+
+            if event.HasField("status_update"):
+                status = event.status_update.status
+                state = TaskState.Name(status.state)
+                self._advance_task_state(
+                    state=state,
+                    task_id=event.status_update.task_id,
+                    context_id=event.status_update.context_id,
+                )
+                self._log_a2a_progress(ProgressAction.UPDATED, details=state)
+                if status.HasField("message"):
+                    status_text = _parts_text(status.message.parts) or status_text
+                continue
+
+            if event.HasField("artifact_update"):
+                update = event.artifact_update
+                artifact = update.artifact
+                text = _parts_text(artifact.parts)
+                if not text:
+                    continue
+                _apply_artifact_update(
+                    artifact_order,
+                    artifact_texts,
+                    artifact,
+                    text,
+                    append=update.append,
+                )
+                self._log_a2a_progress(ProgressAction.STREAMING, details=artifact.name)
+                self._emit_stream(text)
+
+        return _A2AResult(
+            text="\n".join(
+                chunk
+                for chunk in [
+                    *message_chunks,
+                    *(artifact_texts[artifact_id] for artifact_id in artifact_order),
+                ]
+                if chunk
+            ),
+            state=state,
+            status_text=status_text,
+        )
+
+    def _emit_stream(self, text: str) -> None:
+        if not text:
+            return
+        chunk = StreamChunk(text=text)
+        for listener in list(self._stream_listeners):
+            listener(chunk)
+
+    def _advance_task_state(self, *, state: str, task_id: str, context_id: str) -> None:
+        self.last_task_state = state
+        self.context_id = context_id or None
+        if state == _INPUT_REQUIRED_STATE:
+            self.current_task_id = task_id
+            return
+        if state in _TERMINAL_STATES:
+            self.current_task_id = None
+            return
+        self.current_task_id = task_id
+
+
+@dataclass(frozen=True)
+class _A2AResult:
+    text: str
+    state: str | None
+    status_text: str | None
+
+
+
+def _parts_from_messages(messages: Sequence[PromptMessageExtended]) -> list[Part]:
+    parts: list[Part] = []
+    for message in messages:
+        if message.role != "user":
+            continue
+        for content in message.content:
+            if isinstance(content, TextContent):
+                if content.text:
+                    parts.append(Part(text=content.text))
+                continue
+            if isinstance(content, ImageContent | AudioContent):
+                parts.append(
+                    Part(
+                        raw=base64.b64decode(content.data),
+                        media_type=content.mimeType,
+                    )
+                )
+                continue
+            if isinstance(content, ResourceLink):
+                parts.append(
+                    Part(
+                        url=str(content.uri),
+                        media_type=content.mimeType or "",
+                        filename=content.name,
+                    )
+                )
+                continue
+            if isinstance(content, EmbeddedResource):
+                resource = content.resource
+                if isinstance(resource, BlobResourceContents):
+                    parts.append(
+                        Part(
+                            raw=base64.b64decode(resource.blob),
+                            media_type=resource.mimeType or "",
+                            filename=_filename_from_uri(str(resource.uri)),
+                        )
+                    )
+                    continue
+                if isinstance(resource, TextResourceContents):
+                    data_part = _json_data_part(resource.text, media_type=resource.mimeType)
+                    if data_part is not None:
+                        parts.append(data_part)
+                        continue
+                    parts.append(
+                        Part(
+                            text=resource.text,
+                            media_type=resource.mimeType or "text/plain",
+                            filename=_filename_from_uri(str(resource.uri)),
+                        )
+                    )
+    return parts
+
+
+def _filename_from_uri(uri: str) -> str:
+    path = PurePosixPath(uri.split("?", 1)[0])
+    return path.name or "attachment"
+
+
+def _parts_text(parts: Sequence[Part]) -> str:
+    rendered: list[str] = []
+    for part in parts:
+        text = _part_text(part)
+        if text:
+            rendered.append(text)
+    return "\n".join(rendered)
+
+
+def _part_text(part: Part) -> str:
+    if part.HasField("text"):
+        return part.text
+    if part.HasField("url"):
+        label = part.filename or part.url
+        suffix = f" ({part.media_type})" if part.media_type else ""
+        return f"[{label}]({part.url}){suffix}"
+    if part.HasField("data"):
+        data = MessageToDict(part).get("data", {})
+        return f"```json\n{json.dumps(data, indent=2, sort_keys=True)}\n```"
+    if part.HasField("raw"):
+        label = part.filename or "attachment"
+        suffix = f" {part.media_type}" if part.media_type else ""
+        return f"[{label}: {len(part.raw)} bytes{suffix}]"
+    return ""
+
+
+def _card_advertises_oauth(card: AgentCard) -> bool:
+    if not card.security_schemes or not card.security_requirements:
+        return False
+    required_scheme_names = {
+        scheme_name
+        for requirement in card.security_requirements
+        for scheme_name in requirement.schemes
+    }
+    for scheme_name in required_scheme_names:
+        scheme = card.security_schemes.get(scheme_name)
+        if scheme is None:
+            continue
+        if scheme.HasField("oauth2_security_scheme") or scheme.HasField(
+            "open_id_connect_security_scheme"
+        ):
+            return True
+    return False
+
+
+def _card_advertises_http_bearer(card: AgentCard) -> bool:
+    if not card.security_schemes or not card.security_requirements:
+        return False
+    required_scheme_names = {
+        scheme_name
+        for requirement in card.security_requirements
+        for scheme_name in requirement.schemes
+    }
+    for scheme_name in required_scheme_names:
+        scheme = card.security_schemes.get(scheme_name)
+        if scheme is None:
+            continue
+        if not scheme.HasField("http_auth_security_scheme"):
+            continue
+        http_scheme = scheme.http_auth_security_scheme.scheme
+        if http_scheme.lower() == "bearer":
+            return True
+    return False
+
+
+def _headers_for_resolved_card(
+    *,
+    url: str,
+    headers: dict[str, str] | None,
+    explicit_headers: bool,
+    card: AgentCard,
+) -> dict[str, str] | None:
+    if explicit_headers or not is_hf_space_url(url) or not _card_advertises_http_bearer(card):
+        return headers
+
+    token = get_hf_token_from_env()
+    if not token:
+        return None
+    return add_explicit_bearer_auth_header(url, None, token)
+
+
+def _latest_text(messages: Sequence[PromptMessageExtended]) -> str:
+    for message in reversed(messages):
+        text = message.all_text()
+        if text.strip():
+            return text
+    return ""
+
+
+def _append_text(chunks: list[str], text: str) -> None:
+    if not text:
+        return
+    chunks.append(text)
+
+
+def _artifact_key(artifact: Any) -> str:
+    artifact_id = artifact.artifact_id
+    if artifact_id:
+        return artifact_id
+    if artifact.name:
+        return artifact.name
+    return str(id(artifact))
+
+
+def _replace_artifact_text(
+    artifact_order: list[str],
+    artifact_texts: dict[str, str],
+    artifact: Any,
+    text: str,
+) -> None:
+    if not text:
+        return
+    key = _artifact_key(artifact)
+    if key not in artifact_texts:
+        artifact_order.append(key)
+    artifact_texts[key] = text
+
+
+def _apply_artifact_update(
+    artifact_order: list[str],
+    artifact_texts: dict[str, str],
+    artifact: Any,
+    text: str,
+    *,
+    append: bool,
+) -> None:
+    key = _artifact_key(artifact)
+    if key not in artifact_texts:
+        artifact_order.append(key)
+        artifact_texts[key] = text
+        return
+    if append:
+        artifact_texts[key] = f"{artifact_texts[key]}{text}"
+        return
+    artifact_texts[key] = text
+
+
+def _state_message(state: str | None) -> str:
+    if not state:
+        return "A2A task completed without text output."
+    if state == "TASK_STATE_COMPLETED":
+        return "A2A task completed without text output."
+    return "A2A task ended without text output."
+
+
+def _json_data_part(text: str, *, media_type: str | None) -> Part | None:
+    if media_type != "application/json":
+        return None
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError:
+        return None
+    part = Part(media_type=media_type)
+    ParseDict(data, part.data)
+    return part
diff --git a/src/fast_agent/a2a/server.py b/src/fast_agent/a2a/server.py
new file mode 100644
index 000000000..e5a961413
--- /dev/null
+++ b/src/fast_agent/a2a/server.py
@@ -0,0 +1,786 @@
+"""Expose fast-agent agents through A2A HTTP transports."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import contextlib
+import copy
+import json
+import os
+from importlib.metadata import version as get_version
+from pathlib import PurePosixPath
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+from urllib.parse import quote, unquote, urlparse
+
+import uvicorn
+from a2a.server.agent_execution.agent_executor import AgentExecutor
+from a2a.server.request_handlers import DefaultRequestHandler
+from a2a.server.request_handlers.response_helpers import agent_card_to_dict
+from a2a.server.routes import create_agent_card_routes, create_jsonrpc_routes, create_rest_routes
+from a2a.server.routes.common import DefaultServerCallContextBuilder
+from a2a.server.tasks.inmemory_task_store import InMemoryTaskStore
+from a2a.server.tasks.task_updater import TaskUpdater
+from a2a.types import (
+    AgentCapabilities,
+    AgentCard,
+    AgentInterface,
+    AgentProvider,
+    AgentSkill,
+    HTTPAuthSecurityScheme,
+    Message,
+    Part,
+    SecurityRequirement,
+    SecurityScheme,
+    StringList,
+    Task,
+    TaskState,
+    TaskStatus,
+)
+from fastapi import FastAPI
+from google.protobuf.json_format import MessageToDict, ParseDict
+from mcp.types import (
+    BlobResourceContents,
+    EmbeddedResource,
+    ImageContent,
+    ResourceLink,
+    TextContent,
+    TextResourceContents,
+)
+from pydantic import AnyUrl
+from starlette.responses import JSONResponse
+
+from fast_agent.core.default_agent import agent_is_default, resolve_default_agent_name
+from fast_agent.core.exceptions import ProviderKeyError
+from fast_agent.core.logging.logger import get_logger
+from fast_agent.mcp.auth.context import request_bearer_token
+from fast_agent.types import LlmStopReason, PromptMessageExtended
+
+if TYPE_CHECKING:
+    from collections.abc import Awaitable, Callable
+
+    from a2a.server.agent_execution.context import RequestContext
+    from a2a.server.events.event_queue import EventQueue
+    from starlette.requests import Request
+    from starlette.types import ASGIApp, Receive, Scope, Send
+
+    from fast_agent.core.fastagent import AgentInstance
+    from fast_agent.interfaces import AgentProtocol
+    from fast_agent.llm.stream_types import StreamChunk
+
+
+@runtime_checkable
+class _StreamListenerCapable(Protocol):
+    def add_stream_listener(self, listener: Any) -> Any:
+        """Register a text stream listener."""
+
+
+logger = get_logger(__name__)
+
+A2A_INPUT_MODES = ["text/plain", "application/json", "application/octet-stream", "image/*"]
+A2A_OUTPUT_MODES = ["text/plain", "application/json", "application/octet-stream", "image/*"]
+A2A_HF_BEARER_SCHEME = "hf_bearer"
+
+
+def _fast_agent_version() -> str:
+    for package_name in ("fast-agent-mcp", "fast-agent"):
+        with contextlib.suppress(Exception):
+            return get_version(package_name)
+    return "unknown"
+
+
+def _get_a2a_oauth_provider() -> str | None:
+    oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
+    if oauth_provider in {"hf", "huggingface"}:
+        return "huggingface"
+    if not oauth_provider:
+        return None
+    return oauth_provider
+
+
+def _bearer_token_from_header(value: str | None) -> str | None:
+    if value is None:
+        return None
+    stripped = value.strip()
+    if not stripped:
+        return None
+    prefix = "bearer "
+    if stripped.lower().startswith(prefix):
+        token = stripped[len(prefix) :].strip()
+        return token or None
+    return None
+
+
+def _bearer_token_from_call_context(context: RequestContext) -> str | None:
+    saved_token = context.call_context.state.get("fast_agent_bearer_token")
+    if isinstance(saved_token, str) and saved_token:
+        return saved_token
+
+    headers = context.call_context.state.get("headers")
+    if not isinstance(headers, dict):
+        return None
+    authorization = headers.get("authorization") or headers.get("Authorization")
+    token = _bearer_token_from_header(authorization if isinstance(authorization, str) else None)
+    if token is not None:
+        return token
+    hf_authorization = headers.get("x-hf-authorization") or headers.get("X-HF-Authorization")
+    return _bearer_token_from_header(
+        hf_authorization if isinstance(hf_authorization, str) else None
+    )
+
+
+class A2ABearerAuthMiddleware:
+    """Require bearer authentication for A2A action routes."""
+
+    def __init__(self, app: ASGIApp, *, provider: str) -> None:
+        self.app = app
+        self.provider = provider
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        path = str(scope.get("path", ""))
+        if not path.startswith("/a2a/"):
+            await self.app(scope, receive, send)
+            return
+
+        headers = list(scope.get("headers", []))
+        authorization = _header_value(headers, b"authorization")
+        hf_authorization = _header_value(headers, b"x-hf-authorization")
+        if authorization is None and hf_authorization is not None:
+            authorization = hf_authorization
+            headers.append((b"authorization", hf_authorization.encode("latin-1")))
+            scope = dict(scope, headers=headers)
+
+        token = _bearer_token_from_header(authorization)
+        if token is None:
+            response = JSONResponse(
+                {"error": "unauthorized"},
+                status_code=401,
+                headers={
+                    "WWW-Authenticate": (
+                        f'Bearer realm="fast-agent-a2a", '
+                        f'error="invalid_token", provider="{self.provider}"'
+                    )
+                },
+            )
+            await response(scope, receive, send)
+            return
+
+        state = dict(scope.get("state") or {})
+        state["fast_agent_bearer_token"] = token
+        scope = dict(scope, state=state)
+        await self.app(scope, receive, send)
+
+
+def _header_value(headers: list[tuple[bytes, bytes]], name: bytes) -> str | None:
+    for key, value in headers:
+        if key.lower() == name:
+            return value.decode("latin-1")
+    return None
+
+
+class A2AServerCallContextBuilder(DefaultServerCallContextBuilder):
+    """Build A2A call context while preserving fast-agent request auth state."""
+
+    def build(self, request: Request) -> Any:
+        context = super().build(request)
+        token = getattr(request.state, "fast_agent_bearer_token", None)
+        if isinstance(token, str) and token:
+            context.state["fast_agent_bearer_token"] = token
+        return context
+
+
+class FastAgentA2AExecutor(AgentExecutor):
+    """A2A executor that routes tasks into fast-agent agents."""
+
+    def __init__(
+        self,
+        primary_instance: AgentInstance,
+        create_instance: Callable[[], Awaitable[AgentInstance]],
+        dispose_instance: Callable[[AgentInstance], Awaitable[None]],
+        *,
+        primary_agent_name: str,
+        instance_scope: str = "connection",
+    ) -> None:
+        self._primary_instance = primary_instance
+        self._create_instance = create_instance
+        self._dispose_instance = dispose_instance
+        self._primary_agent_name = primary_agent_name
+        self._instance_scope = instance_scope
+        self._context_instances: dict[str, AgentInstance] = {}
+        self._context_locks: dict[str, asyncio.Lock] = {}
+        self._running_tasks: dict[str, asyncio.Task[None]] = {}
+        self._lock = asyncio.Lock()
+
+    async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
+        task_id = context.task_id or ""
+        running_task = self._running_tasks.get(task_id)
+        if running_task is not None:
+            running_task.cancel()
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=task_id,
+            context_id=context.context_id or "",
+        )
+        await updater.cancel()
+
+    async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
+        if not context.message or not context.task_id or not context.context_id:
+            return
+
+        task = asyncio.current_task()
+        if task is not None:
+            self._running_tasks[context.task_id] = task
+        try:
+            await self._execute(context, event_queue)
+        finally:
+            self._running_tasks.pop(context.task_id, None)
+
+    async def _execute(self, context: RequestContext, event_queue: EventQueue) -> None:
+        assert context.message is not None
+        assert context.task_id is not None
+        assert context.context_id is not None
+
+        await event_queue.enqueue_event(
+            Task(
+                id=context.task_id,
+                context_id=context.context_id,
+                status=TaskStatus(state=TaskState.TASK_STATE_SUBMITTED),
+                history=[context.message],
+            )
+        )
+
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=context.task_id,
+            context_id=context.context_id,
+        )
+        await updater.start_work(
+            message=updater.new_agent_message(parts=[Part(text="fast-agent is working")])
+        )
+
+        lock = await self._context_lock(self._lock_key(context))
+        async with lock:
+            saved_bearer_token = request_bearer_token.set(
+                _bearer_token_from_call_context(context)
+            )
+            instance: AgentInstance | None = None
+            try:
+                instance = await self._acquire_instance(context.context_id)
+                stream_context: _A2AStreamingContext | None = None
+                try:
+                    agent = self._select_agent(instance, context.message)
+                    stream_context = self._prepare_streaming_context(
+                        agent=agent,
+                        updater=updater,
+                    )
+                    response = await agent.generate(
+                        _prompt_from_a2a_message(context.message),
+                    )
+                except ProviderKeyError as exc:
+                    await updater.requires_auth(
+                        message=updater.new_agent_message(parts=[Part(text=exc.message)])
+                    )
+                    return
+                except asyncio.CancelledError:
+                    await updater.cancel()
+                    raise
+                except Exception as exc:
+                    await updater.failed(
+                        message=updater.new_agent_message(parts=[Part(text=str(exc))])
+                    )
+                    return
+                finally:
+                    if stream_context is not None:
+                        await self._cleanup_streaming_context(stream_context)
+            finally:
+                request_bearer_token.reset(saved_bearer_token)
+                if instance is not None:
+                    await self._release_instance(
+                        context.context_id,
+                        instance,
+                    )
+
+        streamed_text = stream_context.streamed_text()
+        response_text = response.all_text()
+        if response.stop_reason == LlmStopReason.PAUSE:
+            await updater.requires_input(
+                message=updater.new_agent_message(parts=_parts_from_prompt_message(response))
+            )
+            return
+
+        if streamed_text:
+            if response_text and response_text != streamed_text:
+                await updater.add_artifact(
+                    parts=_parts_from_prompt_message(response),
+                    artifact_id=stream_context.artifact_id,
+                    name="response",
+                    append=False,
+                    last_chunk=True,
+                )
+        else:
+            await updater.add_artifact(
+                parts=_parts_from_prompt_message(response),
+                name="response",
+                append=False,
+                last_chunk=True,
+            )
+        await updater.complete()
+
+    def _prepare_streaming_context(
+        self,
+        *,
+        agent: AgentProtocol,
+        updater: TaskUpdater,
+    ) -> "_A2AStreamingContext":
+        stream_context = _A2AStreamingContext(
+            updater=updater,
+            artifact_id=f"{updater.task_id}:response",
+        )
+        if not isinstance(agent, _StreamListenerCapable):
+            return stream_context
+        stream_context.start()
+
+        def on_stream_chunk(chunk: StreamChunk) -> None:
+            if not chunk.text or chunk.is_reasoning:
+                return
+            stream_context.record_chunk(chunk.text)
+
+        stream_context.remove_listener = agent.add_stream_listener(on_stream_chunk)
+        return stream_context
+
+    async def _cleanup_streaming_context(self, stream_context: "_A2AStreamingContext") -> None:
+        if stream_context.remove_listener is not None:
+            stream_context.remove_listener()
+        await stream_context.drain()
+        if stream_context.tasks:
+            await asyncio.gather(*stream_context.tasks, return_exceptions=True)
+
+    def _lock_key(self, context: RequestContext) -> str:
+        if self._instance_scope == "shared":
+            return "__shared__"
+        if self._instance_scope == "request":
+            return context.task_id or context.context_id or "__request__"
+        return context.context_id or "__context__"
+
+    async def _context_lock(self, lock_key: str) -> asyncio.Lock:
+        async with self._lock:
+            lock = self._context_locks.get(lock_key)
+            if lock is None:
+                lock = asyncio.Lock()
+                self._context_locks[lock_key] = lock
+            return lock
+
+    async def _acquire_instance(self, context_id: str) -> AgentInstance:
+        if self._instance_scope == "shared":
+            return self._primary_instance
+        if self._instance_scope == "request":
+            return await self._create_instance()
+        instance = self._context_instances.get(context_id)
+        if instance is not None:
+            return instance
+        instance = await self._create_instance()
+        self._context_instances[context_id] = instance
+        return instance
+
+    async def _release_instance(self, context_id: str, instance: AgentInstance) -> None:
+        del context_id
+        if self._instance_scope == "request":
+            await self._dispose_instance(instance)
+
+    def _select_agent(self, instance: AgentInstance, message: Message) -> AgentProtocol:
+        agent_name = _requested_agent_name(message)
+        if agent_name and agent_name in instance.agents:
+            return instance.agents[agent_name]
+        if self._primary_agent_name in instance.agents:
+            return instance.agents[self._primary_agent_name]
+        return instance.app._agent(None)
+
+    async def shutdown(self) -> None:
+        for task in list(self._running_tasks.values()):
+            task.cancel()
+        for instance in list(self._context_instances.values()):
+            await self._dispose_instance(instance)
+        self._context_instances.clear()
+        self._context_locks.clear()
+
+
+class _A2AStreamingContext:
+    def __init__(self, *, updater: TaskUpdater, artifact_id: str) -> None:
+        self.updater = updater
+        self.artifact_id = artifact_id
+        self.remove_listener: Callable[[], None] | None = None
+        self.tasks: list[asyncio.Task[None]] = []
+        self._queue: asyncio.Queue[tuple[str, bool] | None] = asyncio.Queue()
+        self._chunks: list[str] = []
+
+    def start(self) -> None:
+        self.tasks.append(asyncio.create_task(self._publish_chunks()))
+
+    def record_chunk(self, text: str) -> None:
+        append = bool(self._chunks)
+        self._chunks.append(text)
+        self._queue.put_nowait((text, append))
+
+    def streamed_text(self) -> str:
+        return "".join(self._chunks)
+
+    async def _publish_chunks(self) -> None:
+        while True:
+            item = await self._queue.get()
+            if item is None:
+                self._queue.task_done()
+                return
+            text, append = item
+            try:
+                await self.updater.add_artifact(
+                    parts=[Part(text=text)],
+                    artifact_id=self.artifact_id,
+                    name="response",
+                    append=append,
+                    last_chunk=False,
+                )
+            except Exception:
+                logger.warning("Failed to publish A2A streaming artifact update", exc_info=True)
+            finally:
+                self._queue.task_done()
+
+    async def drain(self) -> None:
+        await self._queue.join()
+        if self.tasks:
+            self._queue.put_nowait(None)
+
+
+class AgentA2AServer:
+    """Expose fast-agent as an A2A server over JSON-RPC and HTTP+JSON."""
+
+    def __init__(
+        self,
+        primary_instance: AgentInstance,
+        create_instance: Callable[[], Awaitable[AgentInstance]],
+        dispose_instance: Callable[[AgentInstance], Awaitable[None]],
+        *,
+        server_name: str = "fast-agent-a2a",
+        server_description: str | None = None,
+        host: str = "0.0.0.0",
+        port: int = 8000,
+        instance_scope: str = "connection",
+    ) -> None:
+        self._host = host
+        self._port = port
+        self._oauth_provider = _get_a2a_oauth_provider()
+        self._primary_agent_name = _select_primary_agent(primary_instance)
+        self.agent_card = _build_agent_card(
+            primary_instance=primary_instance,
+            server_name=server_name,
+            server_description=server_description,
+            host=host,
+            port=port,
+            auth_enabled=self._oauth_provider == "huggingface",
+        )
+        self.executor = FastAgentA2AExecutor(
+            primary_instance=primary_instance,
+            create_instance=create_instance,
+            dispose_instance=dispose_instance,
+            primary_agent_name=self._primary_agent_name,
+            instance_scope=instance_scope,
+        )
+        self.request_handler = DefaultRequestHandler(
+            agent_executor=self.executor,
+            task_store=InMemoryTaskStore(),
+            agent_card=self.agent_card,
+        )
+
+    def asgi_app(self) -> FastAPI:
+        app = FastAPI(title=self.agent_card.name)
+        context_builder = A2AServerCallContextBuilder()
+        app.routes.extend(_agent_card_routes(self.agent_card, host=self._host, port=self._port))
+        app.routes.extend(
+            create_jsonrpc_routes(
+                request_handler=self.request_handler,
+                rpc_url="/a2a/jsonrpc",
+                context_builder=context_builder,
+            )
+        )
+        app.routes.extend(
+            create_rest_routes(
+                request_handler=self.request_handler,
+                path_prefix="/a2a/rest",
+                context_builder=context_builder,
+            )
+        )
+        if self._oauth_provider == "huggingface":
+            app.add_middleware(A2ABearerAuthMiddleware, provider=self._oauth_provider)
+        return app
+
+    async def run_async(self, *, host: str | None = None, port: int | None = None) -> None:
+        server = uvicorn.Server(
+            uvicorn.Config(
+                self.asgi_app(),
+                host=host or self._host,
+                port=port or self._port,
+                log_level="warning",
+            )
+        )
+        try:
+            await server.serve()
+        finally:
+            await self.executor.shutdown()
+
+
+def _select_primary_agent(primary_instance: AgentInstance) -> str:
+    selected = resolve_default_agent_name(
+        primary_instance.agents,
+        is_default=lambda _name, agent: agent_is_default(agent),
+        is_tool_only=lambda _name, _agent: False,
+    )
+    if selected is not None:
+        return selected
+    return next(iter(primary_instance.agents))
+
+
+def _build_agent_card(
+    *,
+    primary_instance: AgentInstance,
+    server_name: str,
+    server_description: str | None,
+    host: str,
+    port: int,
+    auth_enabled: bool = False,
+) -> AgentCard:
+    base_url = _base_url(host=host, port=port)
+    security_requirements = _security_requirements() if auth_enabled else []
+    skills = [
+        _agent_skill_from_fast_agent(
+            agent_name,
+            agent,
+            security_requirements=security_requirements,
+        )
+        for agent_name, agent in primary_instance.agents.items()
+    ]
+    return AgentCard(
+        name=server_name,
+        description=server_description or "A fast-agent A2A server.",
+        provider=AgentProvider(organization="fast-agent", url="https://fast-agent.ai"),
+        version=_fast_agent_version(),
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        default_input_modes=A2A_INPUT_MODES,
+        default_output_modes=A2A_OUTPUT_MODES,
+        skills=skills,
+        security_schemes=_security_schemes() if auth_enabled else {},
+        security_requirements=security_requirements,
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/jsonrpc",
+            ),
+            AgentInterface(
+                protocol_binding="HTTP+JSON",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/rest",
+            ),
+        ],
+    )
+
+
+def _agent_card_routes(agent_card: AgentCard, *, host: str, port: int) -> list[Any]:
+    if not _is_wildcard_host(host):
+        return create_agent_card_routes(agent_card=agent_card)
+
+    from starlette.routing import Route
+
+    async def _get_agent_card(request: "Request") -> JSONResponse:
+        base_url = (
+            os.environ.get("FAST_AGENT_PUBLIC_URL")
+            or os.environ.get("FAST_AGENT_OAUTH_RESOURCE_URL")
+            or str(request.base_url)
+        ).rstrip("/")
+        return JSONResponse(agent_card_to_dict(_agent_card_with_base_url(agent_card, base_url)))
+
+    return [
+        Route("/.well-known/agent-card.json", endpoint=_get_agent_card, methods=["GET"]),
+    ]
+
+
+def _agent_card_with_base_url(agent_card: AgentCard, base_url: str) -> AgentCard:
+    card = copy.deepcopy(agent_card)
+    for interface in card.supported_interfaces:
+        if interface.protocol_binding == "JSONRPC":
+            interface.url = f"{base_url}/a2a/jsonrpc"
+        if interface.protocol_binding == "HTTP+JSON":
+            interface.url = f"{base_url}/a2a/rest"
+    return card
+
+
+def _base_url(*, host: str, port: int) -> str:
+    return f"http://{_url_host(host)}:{port}"
+
+
+def _url_host(bind_host: str) -> str:
+    if _is_wildcard_host(bind_host):
+        return "localhost"
+    if ":" in bind_host and not bind_host.startswith("["):
+        return f"[{bind_host}]"
+    return bind_host
+
+
+def _is_wildcard_host(bind_host: str) -> bool:
+    return bind_host in {"0.0.0.0", "::", ""}
+
+
+def _security_schemes() -> dict[str, SecurityScheme]:
+    return {
+        A2A_HF_BEARER_SCHEME: SecurityScheme(
+            http_auth_security_scheme=HTTPAuthSecurityScheme(
+                scheme="bearer",
+                bearer_format="HF_TOKEN",
+                description="Hugging Face bearer token",
+            )
+        )
+    }
+
+
+def _security_requirements() -> list[SecurityRequirement]:
+    return [
+        SecurityRequirement(
+            schemes={A2A_HF_BEARER_SCHEME: StringList(list=[])}
+        )
+    ]
+
+
+def _agent_skill_from_fast_agent(
+    agent_name: str,
+    agent: AgentProtocol,
+    *,
+    security_requirements: list[SecurityRequirement] | None = None,
+) -> AgentSkill:
+    agent_type = str(agent.agent_type) if agent.agent_type else "agent"
+    description = agent.config.description or f"Send a message to the {agent_name} fast-agent agent."
+    return AgentSkill(
+        id=agent_name,
+        name=agent_name,
+        description=description,
+        tags=["fast-agent", agent_type],
+        examples=["Hello"],
+        input_modes=A2A_INPUT_MODES,
+        output_modes=A2A_OUTPUT_MODES,
+        security_requirements=security_requirements or [],
+    )
+
+
+def _requested_agent_name(message: Message) -> str | None:
+    metadata = MessageToDict(message).get("metadata")
+    if not isinstance(metadata, dict):
+        return None
+    requested = metadata.get("agent") or metadata.get("fast_agent_agent")
+    return requested if isinstance(requested, str) and requested else None
+
+
+def _prompt_from_a2a_message(message: Message) -> PromptMessageExtended:
+    content: list[Any] = []
+    for part in message.parts:
+        content.extend(_content_from_part(part))
+    if not content:
+        content.append(TextContent(type="text", text=""))
+    return PromptMessageExtended(role="user", content=content)
+
+
+def _content_from_part(part: Part) -> list[Any]:
+    if part.HasField("text"):
+        return [TextContent(type="text", text=part.text)]
+    if part.HasField("url"):
+        label = part.filename or part.url
+        try:
+            return [
+                ResourceLink(
+                    type="resource_link",
+                    name=label,
+                    uri=AnyUrl(part.url),
+                    mimeType=part.media_type or None,
+                )
+            ]
+        except ValueError:
+            return [TextContent(type="text", text=f"[{label}]({part.url})")]
+    if part.HasField("raw"):
+        data = base64.b64encode(part.raw).decode("ascii")
+        if part.media_type.startswith("image/"):
+            return [ImageContent(type="image", data=data, mimeType=part.media_type)]
+        label = part.filename or "attachment"
+        return [
+            EmbeddedResource(
+                type="resource",
+                resource=BlobResourceContents(
+                    uri=AnyUrl(f"attachment:///{quote(label)}"),
+                    mimeType=part.media_type or "application/octet-stream",
+                    blob=data,
+                ),
+            )
+        ]
+    if part.HasField("data"):
+        data = MessageToDict(part).get("data", {})
+        return [TextContent(type="text", text=json.dumps(data, indent=2, sort_keys=True))]
+    return []
+
+
+def _parts_from_prompt_message(message: PromptMessageExtended) -> list[Part]:
+    parts: list[Part] = []
+    for content in message.content:
+        if isinstance(content, TextContent):
+            parts.append(Part(text=content.text))
+            continue
+        if isinstance(content, ImageContent):
+            parts.append(
+                Part(raw=base64.b64decode(content.data), media_type=content.mimeType)
+            )
+            continue
+        if isinstance(content, EmbeddedResource):
+            resource = content.resource
+            if isinstance(resource, BlobResourceContents):
+                parts.append(
+                    Part(
+                        raw=base64.b64decode(resource.blob),
+                        media_type=resource.mimeType or "",
+                        filename=_filename_from_uri(str(resource.uri)),
+                    )
+                )
+                continue
+            if isinstance(resource, TextResourceContents):
+                data_part = _json_data_part(resource.text, media_type=resource.mimeType)
+                if data_part is not None:
+                    parts.append(data_part)
+                    continue
+                parts.append(Part(text=resource.text))
+            continue
+        if isinstance(content, ResourceLink):
+            parts.append(
+                Part(
+                    url=str(content.uri),
+                    media_type=content.mimeType or "",
+                    filename=content.name,
+                )
+            )
+    if not parts:
+        parts.append(Part(text=message.all_text()))
+    return parts
+
+
+def _filename_from_uri(uri: str) -> str:
+    parsed = urlparse(uri)
+    name = PurePosixPath(unquote(parsed.path)).name
+    return name or parsed.netloc or "attachment"
+
+
+def _json_data_part(text: str, *, media_type: str | None) -> Part | None:
+    if media_type != "application/json":
+        return None
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError:
+        return None
+    part = Part(media_type=media_type)
+    ParseDict(data, part.data)
+    return part
diff --git a/src/fast_agent/agents/agent_types.py b/src/fast_agent/agents/agent_types.py
index 450fc8048..0a85c61a6 100644
--- a/src/fast_agent/agents/agent_types.py
+++ b/src/fast_agent/agents/agent_types.py
@@ -33,6 +33,7 @@ class AgentType(StrEnum):
     CHAIN = auto()
     ITERATIVE_PLANNER = auto()
     MAKER = auto()
+    A2A = auto()
 
 
 SkillConfig: TypeAlias = (
diff --git a/src/fast_agent/cli/__main__.py b/src/fast_agent/cli/__main__.py
index 3e49fde7d..fe54f38f9 100644
--- a/src/fast_agent/cli/__main__.py
+++ b/src/fast_agent/cli/__main__.py
@@ -21,6 +21,8 @@
     "--watch",
     "--reload",
     "--smart",
+    "--a2a-oauth",
+    "--no-a2a-oauth",
     "-x",
 }
 
diff --git a/src/fast_agent/cli/commands/README.md b/src/fast_agent/cli/commands/README.md
index 29db4ae33..5e72c38b3 100644
--- a/src/fast_agent/cli/commands/README.md
+++ b/src/fast_agent/cli/commands/README.md
@@ -20,7 +20,7 @@ fast-agent go [OPTIONS]
 - `--config-path`, `-c <path-or-uri>`: Path, HTTP(S) URL, `file://` URI, or `hf://` URI to config file
 - `--servers TEXT`: Comma-separated list of server names to enable from config
 - `--url TEXT`: Comma-separated list of HTTP/SSE URLs to connect to directly
-- `--auth TEXT`: Bearer token for authorization with URL-based servers
+- `--auth TEXT`: Bearer token for authorization with remote MCP URL servers and A2A endpoints
 - `--client-metadata-url TEXT`: OAuth Client ID Metadata Document URL for URL-based servers
 - `--model TEXT`: Override the default model (e.g., haiku, sonnet, gpt-4)
 - `--pack`, `--card-pack TEXT`: Install or reuse a named card pack in the selected environment before launch
@@ -83,10 +83,13 @@ The `--url` parameter allows you to connect directly to HTTP or SSE servers usin
 
 ### Authentication
 
-The `--auth` parameter provides authentication for URL-based servers:
+The `--auth` parameter provides authentication for remote connections created by
+the CLI:
 
-- When provided, it creates an `Authorization: Bearer TOKEN` header for all URL-based servers
-- This is commonly used with API endpoints that require authentication
+- For `--url`, it creates an `Authorization: Bearer TOKEN` header for all URL-based MCP servers
+- For `--a2a`, it creates an `Authorization: Bearer TOKEN` header for all generated ad hoc A2A AgentCards
+- This is a convenience flag for simple runs where the same credential applies to every remote endpoint
+- Use checked-in config or AgentCards when different remote endpoints need different credentials
 - Example: `fast-agent go --url=https://api.example.com/mcp --auth=12345abcde`
 
 ## Serve Command
@@ -107,7 +110,7 @@ fast-agent serve [OPTIONS]
 - `--servers TEXT`: Comma-separated list of server names to enable from config
 - `--card`, `--agent-cards <path-or-uri>`: Path, HTTP(S) URL, `file://` URI, or `hf://` URI to an AgentCard file or directory (repeatable)
 - `--url TEXT`: Comma-separated list of HTTP/SSE URLs to connect to
-- `--auth TEXT`: Bearer token for authorization with URL-based servers
+- `--auth TEXT`: Bearer token for authorization with remote MCP URL servers and A2A endpoints
 - `--client-metadata-url TEXT`: OAuth Client ID Metadata Document URL for URL-based servers
 - `--model TEXT`: Override the default model (e.g., haiku, sonnet, gpt-4)
 - `--skills-dir`, `--skills PATH`: Override the default skills directory
diff --git a/src/fast_agent/cli/commands/go.py b/src/fast_agent/cli/commands/go.py
index a8103c3ce..edcbc7671 100644
--- a/src/fast_agent/cli/commands/go.py
+++ b/src/fast_agent/cli/commands/go.py
@@ -4,11 +4,13 @@
 
 import os
 import sys
+import tempfile
 from pathlib import Path  # noqa: TC003 - typer resolves Path annotations at runtime
 from typing import Any, Literal
 
 import typer
 
+from fast_agent.a2a.connect import normalize_a2a_transport, normalize_a2a_url
 from fast_agent.cli.command_support import ensure_context_object, get_settings_or_exit
 from fast_agent.cli.env_helpers import resolve_environment_dir_option
 from fast_agent.cli.runtime.agent_setup import run_agent_request
@@ -43,6 +45,8 @@
 from fast_agent.cli.shared_options import CommonAgentOptions
 from fast_agent.cli.update_check import check_for_update_notice, should_run_update_check
 from fast_agent.constants import FAST_AGENT_SHELL_CHILD_ENV
+from fast_agent.core.exceptions import AgentConfigError
+from fast_agent.mcp.hf_auth import add_explicit_bearer_auth_header
 from fast_agent.paths import resolve_environment_paths
 
 CARD_EXTENSIONS = _CARD_EXTENSIONS
@@ -105,6 +109,63 @@ def _merge_card_sources(
     return merge_card_sources(sources, default_dir)
 
 
+def _materialize_a2a_agent_cards(
+    urls: list[str],
+    *,
+    transport: str | None,
+    oauth: bool | None = None,
+    auth_token: str | None = None,
+) -> tuple[tempfile.TemporaryDirectory[str], list[str]]:
+    normalized_transport = None
+    if transport:
+        normalized_transport = normalize_a2a_transport(transport)
+        if normalized_transport is None:
+            raise typer.BadParameter(
+                f"Unsupported A2A transport: {transport}",
+                param_hint="--a2a-transport",
+            )
+
+    tempdir = tempfile.TemporaryDirectory(prefix="fast-agent-a2a-")
+    paths: list[str] = []
+    for index, raw_url in enumerate(urls, start=1):
+        url, card_path, error = normalize_a2a_url(raw_url)
+        if error:
+            tempdir.cleanup()
+            raise typer.BadParameter(error, param_hint="--a2a")
+        name = "a2a_remote" if index == 1 else f"a2a_remote_{index}"
+        lines = [
+            "type: a2a",
+            f"name: {name}",
+            f"url: {url}",
+        ]
+        if auth_token:
+            normalized_token = auth_token.strip()
+            if normalized_token.lower().startswith("bearer "):
+                normalized_token = normalized_token[7:].strip()
+            if not normalized_token:
+                tempdir.cleanup()
+                raise typer.BadParameter("Auth token cannot be empty", param_hint="--auth")
+            headers = add_explicit_bearer_auth_header(url, None, normalized_token)
+            lines.append("headers:")
+            for key, value in headers.items():
+                lines.append(f"  {key}: {value!r}")
+        if normalized_transport:
+            lines.append(f"transport: {normalized_transport}")
+        if oauth is not None:
+            lines.extend(
+                [
+                    "auth:",
+                    f"  oauth: {str(oauth).lower()}",
+                ]
+            )
+        if card_path:
+            lines.append(f"relative_card_path: {card_path}")
+        path = Path(tempdir.name) / f"{name}.yaml"
+        path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+        paths.append(str(path))
+    return tempdir, paths
+
+
 def _merge_pack_card_sources(
     sources: list[str] | None,
     pack_dir: Path,
@@ -342,6 +403,22 @@ def go(
     config_path: str | None = CommonAgentOptions.config_path(),
     servers: str | None = CommonAgentOptions.servers(),
     agent_cards: list[str] | None = CommonAgentOptions.agent_cards(),
+    a2a: list[str] | None = typer.Option(
+        None,
+        "--a2a",
+        metavar="<url>",
+        help="Connect to a remote A2A agent by base URL or agent-card URL (repeatable).",
+    ),
+    a2a_transport: str | None = typer.Option(
+        None,
+        "--a2a-transport",
+        help="Preferred A2A transport for --a2a: JSONRPC or HTTP+JSON.",
+    ),
+    a2a_oauth: bool | None = typer.Option(
+        None,
+        "--a2a-oauth/--no-a2a-oauth",
+        help="Force or disable browser OAuth for --a2a remote agents.",
+    ),
     card_tools: list[str] | None = CommonAgentOptions.card_tools(),
     urls: str | None = CommonAgentOptions.urls(),
     auth: str | None = CommonAgentOptions.auth(),
@@ -474,6 +551,18 @@ def go(
         agent_cards = _merge_pack_card_sources(agent_cards, env_paths.agent_cards)
         card_tools = _merge_pack_card_sources(card_tools, env_paths.tool_cards)
 
+    a2a_tempdir: tempfile.TemporaryDirectory[str] | None = None
+    if a2a:
+        a2a_tempdir, a2a_cards = _materialize_a2a_agent_cards(
+            a2a,
+            transport=a2a_transport,
+            oauth=a2a_oauth,
+            auth_token=auth,
+        )
+        agent_cards = [*(agent_cards or []), *a2a_cards]
+        if agent is None and len(a2a_cards) == 1:
+            agent = Path(a2a_cards[0]).stem
+
     request = build_command_run_request(
         name=name,
         instruction_option=instruction,
@@ -520,4 +609,11 @@ def go(
 
         queue_startup_notice(update_notice)
 
-    run_request(request)
+    try:
+        run_request(request)
+    except AgentConfigError as exc:
+        typer.echo(str(exc), err=True)
+        raise typer.Exit(1) from exc
+    finally:
+        if a2a_tempdir is not None:
+            a2a_tempdir.cleanup()
diff --git a/src/fast_agent/cli/commands/serve.py b/src/fast_agent/cli/commands/serve.py
index 1ed23e339..bbc5b1411 100644
--- a/src/fast_agent/cli/commands/serve.py
+++ b/src/fast_agent/cli/commands/serve.py
@@ -21,6 +21,7 @@ class ServeTransport(str, Enum):
     HTTP = "http"
     STDIO = "stdio"
     ACP = "acp"
+    A2A = "a2a"
 
 
 class InstanceScope(str, Enum):
@@ -138,7 +139,10 @@ def _build_run_request(
 
 
 app = typer.Typer(
-    help="Expose fast-agent to clients over MCP (http or stdio) or ACP, without writing an agent.py file",
+    help=(
+        "Expose fast-agent to clients over MCP (http or stdio), ACP, or A2A, "
+        "without writing an agent.py file"
+    ),
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
     add_completion=False,
 )
@@ -218,7 +222,9 @@ def serve(
     reload: bool = CommonAgentOptions.reload(),
     watch: bool = CommonAgentOptions.watch(),
 ) -> None:
-    """Expose fast-agent to clients over MCP (http/stdio) or ACP."""
+    """Expose fast-agent to clients over MCP (http/stdio), ACP, or A2A."""
+    if ctx.invoked_subcommand is not None:
+        return
     request = _build_run_request(
         ctx=ctx,
         name=name,
@@ -257,3 +263,77 @@ def serve(
         missing_shell_cwd=missing_shell_cwd,
     )
     run_request(request)
+
+
+@app.command("a2a")
+def serve_a2a(
+    ctx: typer.Context,
+    name: str = typer.Option("fast-agent-a2a", "--name", help="Name for the A2A server"),
+    instruction: str | None = CommonAgentOptions.instruction(),
+    config_path: str | None = CommonAgentOptions.config_path(),
+    model: str | None = CommonAgentOptions.model(),
+    servers: str | None = CommonAgentOptions.servers(),
+    agent_cards: list[str] | None = CommonAgentOptions.agent_cards(),
+    card_tools: list[str] | None = CommonAgentOptions.card_tools(),
+    urls: str | None = CommonAgentOptions.urls(),
+    auth: str | None = CommonAgentOptions.auth(),
+    client_metadata_url: str | None = CommonAgentOptions.client_metadata_url(),
+    env_dir: Path | None = CommonAgentOptions.env_dir(),
+    noenv: bool = CommonAgentOptions.noenv(),
+    smart: bool = CommonAgentOptions.smart(),
+    skills_dir: Path | None = CommonAgentOptions.skills_dir(),
+    npx: str | None = CommonAgentOptions.npx(),
+    uvx: str | None = CommonAgentOptions.uvx(),
+    host: str = typer.Option(
+        "0.0.0.0",
+        "--host",
+        help="Host address to bind for the A2A HTTP server",
+    ),
+    port: int = typer.Option(
+        8000,
+        "--port",
+        help="Port to use for the A2A HTTP server",
+    ),
+    shell: bool = CommonAgentOptions.shell(),
+    no_shell: bool = CommonAgentOptions.no_shell(),
+    instance_scope: InstanceScope = typer.Option(
+        InstanceScope.SHARED,
+        "--instance-scope",
+        help="Control how A2A clients receive isolated agent instances.",
+    ),
+    reload: bool = CommonAgentOptions.reload(),
+    watch: bool = CommonAgentOptions.watch(),
+) -> None:
+    """Expose fast-agent over A2A HTTP transports."""
+    request = _build_run_request(
+        ctx=ctx,
+        name=name,
+        instruction=instruction,
+        config_path=config_path,
+        servers=servers,
+        agent_cards=agent_cards,
+        card_tools=card_tools,
+        urls=urls,
+        auth=auth,
+        client_metadata_url=client_metadata_url,
+        model=model,
+        skills_dir=skills_dir,
+        env_dir=env_dir,
+        noenv=noenv,
+        force_smart=smart,
+        npx=npx,
+        uvx=uvx,
+        stdio=None,
+        description=None,
+        tool_name_template=None,
+        transport=ServeTransport.A2A,
+        host=host,
+        port=port,
+        shell=shell,
+        no_shell=no_shell,
+        instance_scope=instance_scope,
+        no_permissions=False,
+        reload=reload,
+        watch=watch,
+    )
+    run_request(request)
diff --git a/src/fast_agent/cli/constants.py b/src/fast_agent/cli/constants.py
index caae314d5..d04b560a6 100644
--- a/src/fast_agent/cli/constants.py
+++ b/src/fast_agent/cli/constants.py
@@ -49,6 +49,10 @@ def normalize_resume_flag_args(args: list[str], *, start_index: int = 0) -> None
     "--skills-dir",
     "--agent-cards",
     "--card",
+    "--a2a",
+    "--a2a-transport",
+    "--a2a-oauth",
+    "--no-a2a-oauth",
     "--env",
     "--noenv",
     "--no-env",
diff --git a/src/fast_agent/cli/main.py b/src/fast_agent/cli/main.py
index 4707ed390..d079e1cd0 100644
--- a/src/fast_agent/cli/main.py
+++ b/src/fast_agent/cli/main.py
@@ -37,9 +37,30 @@
     "export": "fast_agent.cli.commands.export:app",
 }
 
+LAZY_SUBCOMMAND_HELP: dict[str, str] = {
+    "acp": "Start fast-agent as an ACP stdio server (convenience wrapper for 'serve --transport acp').",
+    "auth": "Manage OAuth tokens stored in the OS keyring for MCP HTTP/SSE servers (identity = base URL).",
+    "batch": "Run batch processing jobs.",
+    "bootstrap": "Create fast-agent quickstarts",
+    "cards": "Manage card packs (list/add/remove/update/publish).",
+    "check": "Check and diagnose FastAgent configuration",
+    "config": "Configure fast-agent settings interactively.",
+    "demo": "Demo commands for UI features.",
+    "export": "Export persisted session traces.",
+    "go": "Run an interactive agent directly from the command line without creating an agent.py file",
+    "model": "Interactive model reference setup.",
+    "plugins": "Manage command plugins (list/add/remove/update).",
+    "quickstart": "Create fast-agent quickstarts",
+    "scaffold": "Initialize a new FastAgent project with configuration files and example agent.",
+    "serve": "Expose fast-agent to clients over MCP (http or stdio), ACP, or A2A, without writing an agent.py file",
+    "skills": "Manage skills (list/available/search/add/remove/update).",
+}
+
 
 class LazyGroup(TyperGroup):
     lazy_subcommands: dict[str, str] = {}
+    lazy_help: dict[str, str] = {}
+    _rendering_root_help: bool = False
 
     def parse_args(self, ctx: click.Context, args: list[str]) -> list[str]:
         if _first_root_command(args) == "go":
@@ -53,6 +74,8 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None
         target = self.lazy_subcommands.get(cmd_name)
         if not target:
             return None
+        if self._rendering_root_help:
+            return click.Command(cmd_name, help=self.lazy_help.get(cmd_name, ""))
         module_path, app_name = target.split(":", 1)
         module = importlib.import_module(module_path)
         typer_app = getattr(module, app_name)
@@ -60,6 +83,13 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None
         command.name = cmd_name
         return command
 
+    def format_help(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
+        self._rendering_root_help = True
+        try:
+            super().format_help(ctx, formatter)
+        finally:
+            self._rendering_root_help = False
+
 
 app = typer.Typer(
     cls=LazyGroup,
@@ -67,6 +97,7 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None
     add_completion=False,  # We'll add this later when we have more commands
 )
 LazyGroup.lazy_subcommands = LAZY_SUBCOMMANDS
+LazyGroup.lazy_help = LAZY_SUBCOMMAND_HELP
 
 
 def _first_root_command(args: list[str]) -> str | None:
diff --git a/src/fast_agent/cli/runtime/agent_setup.py b/src/fast_agent/cli/runtime/agent_setup.py
index 492e622a2..ac0a7d54b 100644
--- a/src/fast_agent/cli/runtime/agent_setup.py
+++ b/src/fast_agent/cli/runtime/agent_setup.py
@@ -15,6 +15,7 @@
 import typer
 from pydantic import BaseModel
 
+from fast_agent.agents.agent_types import AgentConfig, AgentType
 from fast_agent.cli.command_support import get_settings_or_exit
 from fast_agent.cli.commands.server_helpers import add_servers_to_config
 from fast_agent.cli.constants import RESUME_LATEST_SENTINEL
@@ -53,6 +54,10 @@
 
 logger = get_logger(__name__)
 
+_STARTUP_MODEL_DEFINED = "defined"
+_STARTUP_MODEL_NOT_REQUIRED = "not_required"
+_STARTUP_MODEL_UNSATISFIED = "unsatisfied"
+
 
 async def _structured_call(
     agent_obj: Any,
@@ -101,21 +106,17 @@ def _should_prompt_for_model_picker(
     return stdin_is_tty and stdout_is_tty
 
 
-def _explicit_agent_cards_define_startup_model(
-    request: AgentRunRequest,
-    *,
-    model_references: Mapping[str, Mapping[str, str]] | None = None,
-) -> bool:
-    if not request.agent_cards or request.target_agent_name:
-        return False
+def _load_explicit_agent_cards(request: AgentRunRequest):
+    if not request.agent_cards:
+        return None
 
     try:
-        from fast_agent.core.agent_card_loader import load_agent_cards
+        from fast_agent.core.agent_card_loader import LoadedAgentCard, load_agent_cards
         from fast_agent.io.source_resolver import REMOTE_TEXT_SCHEMES, materialize_text_source
     except Exception:
-        return False
+        return None
 
-    loaded_cards = []
+    loaded_cards: list[LoadedAgentCard] = []
     temp_paths: list[Path] = []
     try:
         for source in request.agent_cards:
@@ -128,41 +129,99 @@ def _explicit_agent_cards_define_startup_model(
                 path = materialize_text_source(source, label="AgentCard source")
             loaded_cards.extend(load_agent_cards(path))
     except Exception:
-        return False
+        return None
     finally:
         for path in temp_paths:
             path.unlink(missing_ok=True)
 
-    runnable_configs = []
+    return loaded_cards
+
+
+def _selected_startup_agent_config(request: AgentRunRequest) -> AgentConfig | None:
+    loaded_cards = _load_explicit_agent_cards(request)
+    if not loaded_cards:
+        return None
+
+    runnable_cards = []
     for card in loaded_cards:
         if card.agent_data.get("tool_only", False):
             continue
         config = card.agent_data.get("config")
-        if config is None:
+        if not isinstance(config, AgentConfig):
             continue
-        runnable_configs.append(config)
+        runnable_cards.append((card, config))
 
-    if len(runnable_configs) != 1:
-        return False
+    if request.target_agent_name:
+        runnable_cards = [
+            (card, config) for card, config in runnable_cards if card.name == request.target_agent_name
+        ]
+    elif len(runnable_cards) > 1:
+        default_cards = [
+            (card, config) for card, config in runnable_cards if config.default
+        ]
+        if len(default_cards) == 1:
+            runnable_cards = default_cards
+
+    if len(runnable_cards) != 1:
+        return None
+    return runnable_cards[0][1]
+
+
+def _explicit_agent_cards_startup_model_status(
+    request: AgentRunRequest,
+    *,
+    model_references: Mapping[str, Mapping[str, str]] | None = None,
+) -> str:
+    config = _selected_startup_agent_config(request)
+    if config is None:
+        return _STARTUP_MODEL_UNSATISFIED
 
-    model = runnable_configs[0].model
+    if config.agent_type == AgentType.A2A:
+        return _STARTUP_MODEL_NOT_REQUIRED
+
+    model = config.model
     if not isinstance(model, str):
-        return False
+        return _STARTUP_MODEL_UNSATISFIED
 
     model_spec = model.strip()
     if not model_spec:
-        return False
+        return _STARTUP_MODEL_UNSATISFIED
     if not model_spec.startswith("$"):
-        return True
+        return _STARTUP_MODEL_DEFINED
 
     try:
         from fast_agent.core.model_resolution import resolve_model_reference
 
         resolved_model = resolve_model_reference(model_spec, model_references)
     except ModelConfigError:
-        return False
+        return _STARTUP_MODEL_UNSATISFIED
+
+    return _STARTUP_MODEL_DEFINED if resolved_model.strip() else _STARTUP_MODEL_UNSATISFIED
 
-    return bool(resolved_model.strip())
+
+def _explicit_agent_cards_define_startup_model(
+    request: AgentRunRequest,
+    *,
+    model_references: Mapping[str, Mapping[str, str]] | None = None,
+) -> bool:
+    return (
+        _explicit_agent_cards_startup_model_status(
+            request,
+            model_references=model_references,
+        )
+        == _STARTUP_MODEL_DEFINED
+    )
+
+
+def _explicit_agent_cards_satisfy_startup_model(
+    request: AgentRunRequest,
+    *,
+    model_references: Mapping[str, Mapping[str, str]] | None = None,
+) -> bool:
+    return _explicit_agent_cards_startup_model_status(
+        request,
+        model_references=model_references,
+    ) in {_STARTUP_MODEL_DEFINED, _STARTUP_MODEL_NOT_REQUIRED}
 
 
 
@@ -1134,7 +1193,7 @@ async def run_agent_request(request: AgentRunRequest) -> None:
 
     if request.model is None:
         settings = _load_request_settings(request)
-        startup_model_defined_by_card = _explicit_agent_cards_define_startup_model(
+        startup_model_satisfied_by_card = _explicit_agent_cards_satisfy_startup_model(
             request,
             model_references=getattr(settings, "model_references", None),
         )
@@ -1146,7 +1205,7 @@ async def run_agent_request(request: AgentRunRequest) -> None:
 
         if (
             explicit_source is None
-            and not startup_model_defined_by_card
+            and not startup_model_satisfied_by_card
             and _should_prompt_for_model_picker(
             request,
             stdin_is_tty=sys.stdin.isatty(),
@@ -1169,7 +1228,7 @@ async def run_agent_request(request: AgentRunRequest) -> None:
                 model_spec=request.model,
             )
             startup_model_source_override = "model picker"
-        elif explicit_source is None and not startup_model_defined_by_card:
+        elif explicit_source is None and not startup_model_satisfied_by_card:
             _, initial_model_spec = _resolve_model_picker_initial_selection(
                 settings=settings,
             )
@@ -1324,7 +1383,9 @@ async def default_fallback_agent() -> None:
                 if not target_name:
                     target_name = fast.get_default_agent_name()
                 if target_name:
-                    fast.attach_agent_tools(target_name, tool_loaded_names)
+                    target_data = fast.agents.get(target_name)
+                    if target_data and target_data.get("type") in ("basic", "smart", "custom"):
+                        fast.attach_agent_tools(target_name, tool_loaded_names)
 
             _validate_target_agent_name(fast, request)
             _apply_shell_cwd_policy_preflight(fast, request)
diff --git a/src/fast_agent/cli/shared_options.py b/src/fast_agent/cli/shared_options.py
index 01d884373..69ceaa019 100644
--- a/src/fast_agent/cli/shared_options.py
+++ b/src/fast_agent/cli/shared_options.py
@@ -59,7 +59,7 @@ def auth():
             None,
             "--auth",
             help=(
-                "Authorization token value for URL-based servers "
+                "Authorization token value for remote MCP URL servers and A2A endpoints "
                 "(pass token only; optional 'Bearer ' prefix is accepted)"
             ),
         )
diff --git a/src/fast_agent/config.py b/src/fast_agent/config.py
index b60f9fffb..7697de104 100644
--- a/src/fast_agent/config.py
+++ b/src/fast_agent/config.py
@@ -53,6 +53,10 @@ class MCPServerAuthSettings(BaseModel):
     # and escalates to OAuth on a 401 challenge.
     oauth: bool = True
 
+    # Forward the inbound request bearer token to matching upstream services.
+    # "huggingface" applies only to hf.co, huggingface.co, and *.hf.space URLs.
+    forward: Literal["huggingface"] | None = None
+
     # Local callback server configuration
     redirect_port: int = 3030
     redirect_path: str = "/callback"
@@ -570,6 +574,9 @@ def _normalize_management_specific_settings(self) -> "MCPServerSettings":
                 url=self.url,
                 headers=self.headers,
                 access_token=self.access_token,
+                forward_huggingface=(
+                    self.auth is not None and self.auth.forward == "huggingface"
+                ),
             )
         return self
 
diff --git a/src/fast_agent/core/agent_card_loader.py b/src/fast_agent/core/agent_card_loader.py
index 0dcff78be..2c765c0a4 100644
--- a/src/fast_agent/core/agent_card_loader.py
+++ b/src/fast_agent/core/agent_card_loader.py
@@ -11,6 +11,7 @@
 import frontmatter
 import yaml
 
+from fast_agent.a2a.config import A2AAgentConfig
 from fast_agent.agents.agent_types import (
     AgentConfig,
     AgentType,
@@ -18,7 +19,7 @@
     MCPConnectTarget,
 )
 from fast_agent.command_actions import PluginCommandActionSpec, parse_plugin_command_action_specs
-from fast_agent.config import MCPServerSettings, resolve_env_vars
+from fast_agent.config import MCPServerAuthSettings, MCPServerSettings, resolve_env_vars
 from fast_agent.constants import DEFAULT_AGENT_INSTRUCTION, SMART_AGENT_INSTRUCTION
 from fast_agent.core.agent_card_rules import (
     AGENT_TYPE_TO_CARD_TYPE,
@@ -453,6 +454,7 @@ def _build_agent_data(
         trim_tool_history=trim_tool_history,
         mcp_connect=mcp_connect,
         source_path=path,
+        agent_type=agent_type,
     )
 
     if request_params is not None:
@@ -534,6 +536,32 @@ def _build_agent_data(
         if red_flag is not None:
             red_flag = _ensure_int(red_flag, "red_flag_max_length", path)
         agent_data["red_flag_max_length"] = red_flag
+    elif type_key == "a2a":
+        transport = raw.get("transport")
+        if transport is not None:
+            transport = _ensure_a2a_transport(transport, path)
+        auth = raw.get("auth")
+        if auth is not None and not isinstance(auth, dict):
+            raise AgentConfigError(f"'auth' must be a mapping in {path}")
+        agent_data["a2a"] = A2AAgentConfig(
+            url=_ensure_str(raw.get("url"), "url", path),
+            transport=transport,
+            streaming=_ensure_bool(raw.get("streaming"), "streaming", path, default=True),
+            polling=_ensure_bool(raw.get("polling"), "polling", path, default=False),
+            accepted_output_modes=_ensure_str_list(
+                raw.get("accepted_output_modes", []), "accepted_output_modes", path
+            ),
+            headers=_ensure_headers_map(raw.get("headers"), "headers", path) or {},
+            auth=MCPServerAuthSettings.model_validate(auth) if auth is not None else None,
+            relative_card_path=_ensure_optional_str(
+                raw.get("relative_card_path"), "relative_card_path", path
+            ),
+            request_timeout_seconds=(
+                _ensure_float(raw.get("request_timeout_seconds"), "request_timeout_seconds", path)
+                if raw.get("request_timeout_seconds") is not None
+                else 120.0
+            ),
+        )
 
     return agent_data
 
@@ -572,6 +600,15 @@ def _ensure_optional_str(value: Any, field: str, path: Path) -> str | None:
     return value.strip()
 
 
+def _ensure_a2a_transport(value: Any, path: Path) -> str:
+    transport = _ensure_str(value, "transport", path)
+    if transport not in {"JSONRPC", "HTTP+JSON"}:
+        raise AgentConfigError(
+            f"'transport' must be one of JSONRPC, HTTP+JSON in {path}"
+        )
+    return transport
+
+
 def _ensure_str_list(value: Any, field: str, path: Path) -> list[str]:
     if value is None:
         return []
@@ -1155,6 +1192,36 @@ def _serialize_maker_fields(
         card["red_flag_max_length"] = red_flag
 
 
+def _serialize_a2a_fields(
+    card: dict[str, Any],
+    agent_data: AgentCardData,
+    _config: AgentConfig,
+) -> None:
+    a2a_config = agent_data.get("a2a")
+    if not isinstance(a2a_config, A2AAgentConfig):
+        raise AgentConfigError("A2A agent is missing A2A configuration")
+
+    card["url"] = a2a_config.url
+    if a2a_config.transport:
+        card["transport"] = a2a_config.transport
+    if not a2a_config.streaming:
+        card["streaming"] = False
+    if a2a_config.polling:
+        card["polling"] = True
+    if a2a_config.accepted_output_modes:
+        card["accepted_output_modes"] = list(a2a_config.accepted_output_modes)
+    if a2a_config.headers:
+        card["headers"] = dict(a2a_config.headers)
+    if a2a_config.auth is not None:
+        auth = a2a_config.auth.model_dump(mode="python", exclude_none=True, exclude_defaults=True)
+        auth["oauth"] = a2a_config.auth.oauth
+        card["auth"] = auth
+    if a2a_config.relative_card_path:
+        card["relative_card_path"] = a2a_config.relative_card_path
+    if a2a_config.request_timeout_seconds != 120.0:
+        card["request_timeout_seconds"] = a2a_config.request_timeout_seconds
+
+
 _CARD_SERIALIZERS: dict[CardType, CardTypeSerializer] = {
     "agent": _serialize_agent_like_fields,
     "smart": _serialize_agent_like_fields,
@@ -1165,6 +1232,7 @@ def _serialize_maker_fields(
     "orchestrator": _serialize_orchestrator_fields,
     "iterative_planner": _serialize_iterative_planner_fields,
     "MAKER": _serialize_maker_fields,
+    "a2a": _serialize_a2a_fields,
 }
 
 
diff --git a/src/fast_agent/core/agent_card_rules.py b/src/fast_agent/core/agent_card_rules.py
index 318c24ed5..27d767928 100644
--- a/src/fast_agent/core/agent_card_rules.py
+++ b/src/fast_agent/core/agent_card_rules.py
@@ -16,6 +16,7 @@
     "orchestrator",
     "iterative_planner",
     "MAKER",
+    "a2a",
 ]
 
 CARD_TYPE_TO_AGENT_TYPE: dict[CardType, AgentType] = {
@@ -28,6 +29,7 @@
     "orchestrator": AgentType.ORCHESTRATOR,
     "iterative_planner": AgentType.ITERATIVE_PLANNER,
     "MAKER": AgentType.MAKER,
+    "a2a": AgentType.A2A,
 }
 
 AGENT_TYPE_TO_CARD_TYPE: dict[str, CardType] = {
@@ -141,6 +143,18 @@
         "red_flag_max_length",
         "messages",
     },
+    "a2a": {
+        *COMMON_CARD_FIELDS,
+        "url",
+        "transport",
+        "streaming",
+        "polling",
+        "accepted_output_modes",
+        "headers",
+        "auth",
+        "relative_card_path",
+        "request_timeout_seconds",
+    },
 }
 
 REQUIRED_FIELDS_BY_TYPE: dict[CardType, set[str]] = {
@@ -153,6 +167,7 @@
     "orchestrator": {"agents"},
     "iterative_planner": {"agents"},
     "MAKER": {"worker"},
+    "a2a": {"url"},
 }
 
 DEFAULT_USE_HISTORY_BY_TYPE: dict[CardType, bool] = {
@@ -165,6 +180,7 @@
     "orchestrator": False,
     "iterative_planner": False,
     "MAKER": True,
+    "a2a": True,
 }
 
 MCP_CONNECT_ALLOWED_KEYS = frozenset(
diff --git a/src/fast_agent/core/agent_card_types.py b/src/fast_agent/core/agent_card_types.py
index 4f1c549d8..f8bfeae60 100644
--- a/src/fast_agent/core/agent_card_types.py
+++ b/src/fast_agent/core/agent_card_types.py
@@ -7,6 +7,7 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
+    from fast_agent.a2a.config import A2AAgentConfig
     from fast_agent.agents.agent_types import AgentConfig
     from fast_agent.tools.function_tool_config import FunctionToolSpec
 
@@ -45,3 +46,4 @@ class AgentCardData(TypedDict, total=False):
     red_flag_max_length: int | None
     agent_class: type | None
     cls: type | None
+    a2a: A2AAgentConfig
diff --git a/src/fast_agent/core/direct_factory.py b/src/fast_agent/core/direct_factory.py
index 1aed0b135..d3d630947 100644
--- a/src/fast_agent/core/direct_factory.py
+++ b/src/fast_agent/core/direct_factory.py
@@ -20,6 +20,8 @@
 
 from fastmcp.tools import FunctionTool
 
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
 from fast_agent.agents import McpAgent
 from fast_agent.agents.agent_types import AgentConfig, AgentType, FunctionToolConfig
 from fast_agent.agents.llm_agent import LlmAgent
@@ -956,6 +958,63 @@ async def _create_maker_agent(
     result_agents[name] = maker_agent
 
 
+def _format_a2a_initialization_error(
+    *,
+    name: str,
+    url: str,
+    transport: str | None,
+    exc: Exception,
+) -> str:
+    reason = str(exc).strip()
+    cause = exc.__cause__
+    while cause is not None:
+        cause_text = str(cause).strip()
+        if cause_text:
+            reason = cause_text
+            break
+        reason = cause.__class__.__name__
+        cause = cause.__cause__
+    if not reason:
+        reason = exc.__class__.__name__
+    transport_text = f" via {transport}" if transport else ""
+    return (
+        f"Unable to initialize A2A agent '{name}'{transport_text} at {url}: {reason}. "
+        "Check that the A2A server is running and that the URL points to the agent base "
+        "URL or agent card."
+    )
+
+
+async def _create_a2a_agent(
+    name: str,
+    agent_data: Mapping[str, Any],
+    build_ctx: AgentBuildContext,
+    result_agents: AgentDict,
+) -> None:
+    config = cast("AgentConfig", agent_data["config"])
+    a2a_config = agent_data.get("a2a")
+    if not isinstance(a2a_config, A2AAgentConfig):
+        raise AgentConfigError(f"A2A agent '{name}' missing A2A configuration")
+
+    agent = A2ARemoteAgent(
+        config=config,
+        a2a_config=a2a_config,
+        context=build_ctx.app_instance.context,
+    )
+    try:
+        await agent.initialize()
+    except Exception as exc:
+        await agent.shutdown()
+        raise AgentConfigError(
+            _format_a2a_initialization_error(
+                name=name,
+                url=a2a_config.url,
+                transport=a2a_config.transport,
+                exc=exc,
+            )
+        ) from exc
+    _register_loaded_agent(result_agents, name, agent)
+
+
 _AGENT_TYPE_BUILDERS: dict[AgentType, AgentTypeBuilder] = {
     AgentType.LLM: _create_basic_agent,
     AgentType.BASIC: _create_basic_agent,
@@ -968,6 +1027,7 @@ async def _create_maker_agent(
     AgentType.CHAIN: _create_chain_workflow_agent,
     AgentType.EVALUATOR_OPTIMIZER: _create_evaluator_optimizer_agent,
     AgentType.MAKER: _create_maker_agent,
+    AgentType.A2A: _create_a2a_agent,
 }
 
 
diff --git a/src/fast_agent/core/fastagent.py b/src/fast_agent/core/fastagent.py
index 4669faf30..e19b2d1e0 100644
--- a/src/fast_agent/core/fastagent.py
+++ b/src/fast_agent/core/fastagent.py
@@ -237,9 +237,12 @@ def __init__(
             )
             parser.add_argument(
                 "--transport",
-                choices=["http", "stdio", "acp"],
+                choices=["http", "stdio", "acp", "a2a"],
                 default=None,
-                help="Transport protocol to use when running as a server (http, stdio, or acp)",
+                help=(
+                    "Transport protocol to use when running as a server "
+                    "(http, stdio, acp, or a2a)"
+                ),
             )
             parser.add_argument(
                 "--port",
@@ -2276,7 +2279,7 @@ async def _apply_card_tool_cli_option(
     def _print_server_startup(self, output_stream: Any) -> None:
         print(f"Starting fast-agent  '{self.name}' in server mode", file=output_stream)
         print(f"Transport: {self.args.transport}", file=output_stream)
-        if self.args.transport == "http":
+        if self.args.transport in {"http", "a2a"}:
             print(f"Listening on {self.args.host}:{self.args.port}", file=output_stream)
         print("Press Ctrl+C to stop", file=output_stream)
 
@@ -2359,6 +2362,28 @@ async def _run_mcp_server(
             port=self.args.port,
         )
 
+    async def _run_a2a_server(
+        self,
+        state: ManagedRunState,
+        callbacks: RuntimeCallbacks,
+    ) -> None:
+        from fast_agent.a2a import AgentA2AServer
+
+        server_description = getattr(self.args, "server_description", None)
+        server_name = getattr(self.args, "server_name", None)
+        instance_scope = getattr(self.args, "instance_scope", "shared")
+        a2a_server = AgentA2AServer(
+            primary_instance=state.primary_instance,
+            create_instance=callbacks.create_instance,
+            dispose_instance=callbacks.dispose_instance,
+            server_name=server_name or f"{self.name}",
+            server_description=server_description,
+            host=self.args.host,
+            port=self.args.port,
+            instance_scope=instance_scope,
+        )
+        await a2a_server.run_async(host=self.args.host, port=self.args.port)
+
     async def _handle_server_mode(
         self,
         state: ManagedRunState,
@@ -2378,6 +2403,8 @@ async def _handle_server_mode(
 
             if settings.transport == "acp":
                 await self._run_acp_server(state, callbacks)
+            elif settings.transport == "a2a":
+                await self._run_a2a_server(state, callbacks)
             else:
                 await self._run_mcp_server(state, callbacks)
         except KeyboardInterrupt:
@@ -2864,12 +2891,12 @@ async def start_server(
         tool_name_template: str | None = None,
     ) -> None:
         """
-        Start the application as an MCP server.
-        This method initializes agents and exposes them through an MCP server.
+        Start the application as an MCP, ACP, or A2A server.
+        This method initializes agents and exposes them through the selected server transport.
         It is a blocking method that runs until the server is stopped.
 
         Args:
-            transport: Transport protocol to use ("http" or "stdio")
+            transport: Transport protocol to use ("http", "stdio", "acp", or "a2a")
             host: Host address for the server when using HTTP
             port: Port for the server when using HTTP
             server_name: Optional custom name for the MCP server
@@ -2922,6 +2949,8 @@ async def start_server(
             self.args.watch = original_args.watch
         if original_args is not None and hasattr(original_args, "card_tools"):
             self.args.card_tools = original_args.card_tools
+        if original_args is not None and hasattr(original_args, "noenv"):
+            self.args.noenv = original_args.noenv
 
         # Run the application, which will detect the server flag and start server mode
         async with self.run():
diff --git a/src/fast_agent/core/validation.py b/src/fast_agent/core/validation.py
index 32dab64cd..a254fe5a9 100644
--- a/src/fast_agent/core/validation.py
+++ b/src/fast_agent/core/validation.py
@@ -146,6 +146,7 @@ class _WorkflowReferenceRule:
         DependencyFieldSpec("generator", multiple=False),
     ),
     "MAKER": (DependencyFieldSpec("worker", multiple=False),),
+    "a2a": (),
 }
 
 
diff --git a/src/fast_agent/llm/provider/anthropic/llm_anthropic.py b/src/fast_agent/llm/provider/anthropic/llm_anthropic.py
index 3a9ce0bd0..961ab6202 100644
--- a/src/fast_agent/llm/provider/anthropic/llm_anthropic.py
+++ b/src/fast_agent/llm/provider/anthropic/llm_anthropic.py
@@ -104,6 +104,7 @@
     validate_task_budget_tokens,
 )
 from fast_agent.llm.tool_tracking import ToolCallTracker
+from fast_agent.llm.trace import llm_trace_enabled
 from fast_agent.llm.usage_tracking import TurnUsage
 from fast_agent.mcp.mime_utils import DOCUMENT_MIME_TYPES, guess_mime_type, normalize_mime_type
 from fast_agent.mcp.provider_management import build_anthropic_provider_managed_mcp_payload
@@ -129,7 +130,6 @@
 
 # Stream capture mode - when enabled, saves all streaming chunks to files for debugging
 # Set FAST_AGENT_LLM_TRACE=1 (or any non-empty value) to enable
-STREAM_CAPTURE_ENABLED = bool(os.environ.get("FAST_AGENT_LLM_TRACE"))
 STREAM_CAPTURE_DIR = Path("stream-debug")
 
 # Type alias for system field - can be string or list of text blocks with cache control
@@ -235,7 +235,7 @@ def _is_beta_text_block_validation_error(error: Exception) -> bool:
 
 def _stream_capture_filename(turn: int) -> Path | None:
     """Generate filename for stream capture. Returns None if capture is disabled."""
-    if not STREAM_CAPTURE_ENABLED:
+    if not llm_trace_enabled():
         return None
     STREAM_CAPTURE_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
diff --git a/src/fast_agent/llm/provider/google/_stream_capture.py b/src/fast_agent/llm/provider/google/_stream_capture.py
index 84e480592..7e1b6167b 100644
--- a/src/fast_agent/llm/provider/google/_stream_capture.py
+++ b/src/fast_agent/llm/provider/google/_stream_capture.py
@@ -3,22 +3,21 @@
 from __future__ import annotations
 
 import json
-import os
 import warnings
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 
 from fast_agent.core.logging.logger import get_logger
+from fast_agent.llm.trace import llm_trace_enabled
 
 _logger = get_logger(__name__)
 
-STREAM_CAPTURE_ENABLED = bool(os.environ.get("FAST_AGENT_LLM_TRACE"))
 STREAM_CAPTURE_DIR = Path("stream-debug")
 
 
 def stream_capture_filename(turn: int) -> Path | None:
-    if not STREAM_CAPTURE_ENABLED:
+    if not llm_trace_enabled():
         return None
     STREAM_CAPTURE_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
diff --git a/src/fast_agent/llm/provider/google/llm_google_native.py b/src/fast_agent/llm/provider/google/llm_google_native.py
index f24105167..a7d3ab79d 100644
--- a/src/fast_agent/llm/provider/google/llm_google_native.py
+++ b/src/fast_agent/llm/provider/google/llm_google_native.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import secrets
+import time
 from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import Any, cast
@@ -46,6 +47,7 @@
 # Define default model and potentially other Google-specific defaults
 DEFAULT_GOOGLE_MODEL = "gemini3"
 _GOOGLE_VERTEX_PARTNER_MODEL_PREFIXES = ("claude",)
+GOOGLE_DIAGNOSTICS_CHANNEL = "fast-agent-provider-diagnostics"
 
 
 # Define Google-specific parameter exclusions if necessary
@@ -112,6 +114,7 @@ def __init__(self, **kwargs) -> None:
         super().__init__(provider=Provider.GOOGLE, **kwargs)
         # Initialize the converter
         self._converter = GoogleConverter()
+        self._last_google_provider_diagnostics: dict[str, Any] | None = None
         self._init_reasoning(kwargs)
 
     @property
@@ -331,6 +334,13 @@ async def _stream_generate_content(
         client: genai.Client,
     ) -> types.GenerateContentResponse | None:
         """Stream Gemini responses and return the final aggregated completion."""
+        diagnostics: dict[str, Any] = {
+            "transport": "google-genai-stream",
+            "request_type": "models.generate_content_stream",
+            "streaming": True,
+            "model": model,
+            "phase_ms": {},
+        }
         capture_base = stream_capture_filename(self.chat_turn())
         save_stream_request(
             capture_base,
@@ -340,12 +350,17 @@ async def _stream_generate_content(
                 "config": config,
             },
         )
+        request_start = time.perf_counter()
         try:
             response_stream = await client.aio.models.generate_content_stream(
                 model=model,
                 contents=cast("types.ContentListUnion", contents),
                 config=config,
             )
+            diagnostics["phase_ms"]["send_request"] = round(
+                (time.perf_counter() - request_start) * 1000,
+                2,
+            )
         except AttributeError:
             # Older SDKs might not expose streaming; fall back to non-streaming.
             return None
@@ -362,6 +377,7 @@ async def _stream_generate_content(
             response_stream,
             model=model,
             capture_base=capture_base,
+            diagnostics=diagnostics,
         )
 
     @staticmethod
@@ -532,8 +548,16 @@ async def _consume_google_stream(
         *,
         model: str,
         capture_base=None,
+        diagnostics: dict[str, Any] | None = None,
     ) -> types.GenerateContentResponse | None:
         """Consume the async streaming iterator and aggregate the final response."""
+        stream_start = time.perf_counter()
+        first_event_ms: float | None = None
+        chunk_count = 0
+        text_chunk_count = 0
+        reasoning_chunk_count = 0
+        function_call_chunk_count = 0
+        usage_metadata_seen = False
         estimated_tokens = 0
         timeline: list[GoogleTimelineEntry] = []
         tracker = ToolCallTracker()
@@ -546,9 +570,13 @@ async def _consume_google_stream(
         try:
             # Cancellation is handled via asyncio.Task.cancel() which raises CancelledError
             async for chunk in response_stream:
+                chunk_count += 1
+                if first_event_ms is None:
+                    first_event_ms = round((time.perf_counter() - stream_start) * 1000, 2)
                 save_stream_chunk(capture_base, chunk)
                 last_chunk = chunk
                 if getattr(chunk, "usage_metadata", None):
+                    usage_metadata_seen = True
                     usage_metadata = chunk.usage_metadata
 
                 if not getattr(chunk, "candidates", None):
@@ -564,6 +592,7 @@ async def _consume_google_stream(
                         text = part.text or ""
                         if text:
                             if getattr(part, "thought", False):
+                                reasoning_chunk_count += 1
                                 self._notify_stream_listeners(
                                     StreamChunk(text=text, is_reasoning=True)
                                 )
@@ -573,6 +602,7 @@ async def _consume_google_stream(
                                     cast("bytes | None", part.thought_signature),
                                 )
                             else:
+                                text_chunk_count += 1
                                 self._append_google_text_timeline(timeline, text)
                                 estimated_tokens = self._emit_stream_text_delta(
                                     text=text,
@@ -581,6 +611,7 @@ async def _consume_google_stream(
                                 )
 
                     if getattr(part, "function_call", None):
+                        function_call_chunk_count += 1
                         function_call = part.function_call
                         name = getattr(function_call, "name", None) or "tool"
                         args = getattr(function_call, "args", None) or {}
@@ -675,6 +706,23 @@ async def _consume_google_stream(
                 + ", ".join(f"{tool.name}:{tool.tool_use_id}" for tool in incomplete_tools)
             )
 
+        if diagnostics is not None:
+            phase_ms = diagnostics.setdefault("phase_ms", {})
+            phase_ms["first_event"] = first_event_ms
+            phase_ms["stream_total"] = round((time.perf_counter() - stream_start) * 1000, 2)
+            phase_ms["total"] = round(
+                float(phase_ms.get("send_request") or 0) + phase_ms["stream_total"],
+                2,
+            )
+            diagnostics["stream"] = {
+                "chunk_count": chunk_count,
+                "text_chunks": text_chunk_count,
+                "reasoning_chunks": reasoning_chunk_count,
+                "function_call_chunks": function_call_chunk_count,
+                "usage_metadata_seen": usage_metadata_seen,
+            }
+            self._last_google_provider_diagnostics = diagnostics
+
         return self._build_google_final_response(
             last_chunk=last_chunk,
             usage_metadata=usage_metadata,
@@ -695,6 +743,7 @@ async def _google_completion(
         """
         Process a query using Google's generate_content API and available tools.
         """
+        self._last_google_provider_diagnostics = None
         request_params = self.get_request_params(request_params=request_params)
         responses: list[ContentBlock] = []
         if request_params.structured_schema and response_schema is None:
@@ -767,11 +816,21 @@ async def _google_completion(
                         client=client,
                     )
                 if api_response is None:
+                    request_start = time.perf_counter()
                     api_response = await client.aio.models.generate_content(
                         model=model_name,
                         contents=cast("types.ContentListUnion", conversation_history),
                         config=generate_content_config,
                     )
+                    self._last_google_provider_diagnostics = {
+                        "transport": "google-genai",
+                        "request_type": "models.generate_content",
+                        "streaming": False,
+                        "model": model_name,
+                        "phase_ms": {
+                            "total": round((time.perf_counter() - request_start) * 1000, 2),
+                        },
+                    }
                 self.logger.debug("Google generate_content response:", data=api_response)
 
                 # Track usage if response is valid and has usage data
@@ -900,6 +959,13 @@ async def _google_completion(
             channels = dict(assistant.channels or {})
             channels[REASONING] = reasoning_blocks
             assistant.channels = channels
+        diagnostics = getattr(self, "_last_google_provider_diagnostics", None)
+        if diagnostics:
+            channels = dict(assistant.channels or {})
+            channels[GOOGLE_DIAGNOSTICS_CHANNEL] = [
+                TextContent(type="text", text=json.dumps(diagnostics))
+            ]
+            assistant.channels = channels
         return assistant
 
     #        return responses  # Return the accumulated responses (fast-agent content types)
diff --git a/src/fast_agent/llm/provider/openai/_stream_capture.py b/src/fast_agent/llm/provider/openai/_stream_capture.py
index 4acc99707..3f46d892f 100644
--- a/src/fast_agent/llm/provider/openai/_stream_capture.py
+++ b/src/fast_agent/llm/provider/openai/_stream_capture.py
@@ -7,23 +7,22 @@
 from __future__ import annotations
 
 import json
-import os
 import warnings
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 
 from fast_agent.core.logging.logger import get_logger
+from fast_agent.llm.trace import llm_trace_enabled
 
 _logger = get_logger(__name__)
 
-STREAM_CAPTURE_ENABLED = bool(os.environ.get("FAST_AGENT_LLM_TRACE"))
 STREAM_CAPTURE_DIR = Path("stream-debug")
 
 
 def stream_capture_filename(turn: int) -> Path | None:
     """Generate filename for stream capture. Returns None if capture is disabled."""
-    if not STREAM_CAPTURE_ENABLED:
+    if not llm_trace_enabled():
         return None
     STREAM_CAPTURE_DIR.mkdir(parents=True, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
diff --git a/src/fast_agent/llm/provider/openai/codex_oauth.py b/src/fast_agent/llm/provider/openai/codex_oauth.py
index f010b59da..7b06083a8 100644
--- a/src/fast_agent/llm/provider/openai/codex_oauth.py
+++ b/src/fast_agent/llm/provider/openai/codex_oauth.py
@@ -56,6 +56,8 @@ def delete_password(self, service: str, username: str) -> None: ...
 CODEX_TOKEN_META_KEY = f"{CODEX_TOKEN_KEY}:meta"
 CODEX_TOKEN_CHUNK_PREFIX = f"{CODEX_TOKEN_KEY}:chunk"
 CODEX_KEYRING_MAX_PAYLOAD_BYTES = 512
+
+
 class CodexOAuthTokens(BaseModel):
     access_token: str
     refresh_token: str | None = None
@@ -69,6 +71,17 @@ def is_expired(self, margin_seconds: int = 60) -> bool:
         return time.time() >= (self.expires_at - margin_seconds)
 
 
+_AuthJsonFingerprint = tuple[int, int, int] | None
+_KeyringFingerprint = str | None
+_CodexTokenCacheKey = tuple[str, bool, _AuthJsonFingerprint, _KeyringFingerprint]
+_CodexTokenCache = tuple[
+    _CodexTokenCacheKey,
+    CodexOAuthTokens | None,
+    str | None,
+]
+_codex_token_cache: _CodexTokenCache | None = None
+
+
 @dataclass
 class _CallbackResult:
     authorization_code: str | None = None
@@ -240,6 +253,26 @@ def _prefer_codex_cli_auth_path() -> bool:
     return _resolve_codex_cli_auth_path() != _default_codex_cli_auth_path()
 
 
+def _auth_json_fingerprint(auth_path: Path) -> _AuthJsonFingerprint:
+    try:
+        stat = auth_path.stat()
+    except OSError:
+        return None
+    return (stat.st_mtime_ns, stat.st_size, stat.st_ino)
+
+
+def _keyring_payload_fingerprint(payload: str | None) -> _KeyringFingerprint:
+    if payload is None:
+        return None
+    return hashlib.sha256(payload.encode("utf-8")).hexdigest()
+
+
+def clear_codex_token_cache() -> None:
+    global _codex_token_cache
+
+    _codex_token_cache = None
+
+
 def _safe_delete(keyring_module: _KeyringProtocol, username: str) -> None:
     try:
         keyring_module.delete_password(CODEX_KEYRING_SERVICE, username)
@@ -458,21 +491,48 @@ def _save_codex_cli_tokens(tokens: CodexOAuthTokens) -> None:
 
 
 def _load_codex_tokens_with_source() -> tuple[CodexOAuthTokens | None, str | None]:
-    if _prefer_codex_cli_auth_path():
+    global _codex_token_cache
+
+    auth_path = _resolve_codex_cli_auth_path()
+    prefer_auth_json = _prefer_codex_cli_auth_path()
+    auth_json_fingerprint = _auth_json_fingerprint(auth_path)
+    if _codex_token_cache is not None:
+        cache_key, cached_tokens, cached_source = _codex_token_cache
+        cache_path, cache_prefer_auth_json, cache_auth_json_fingerprint, _ = cache_key
+        if (
+            cache_path == str(auth_path)
+            and cache_prefer_auth_json == prefer_auth_json
+            and cache_auth_json_fingerprint == auth_json_fingerprint
+        ):
+            return cached_tokens, cached_source
+
+    keyring_payload = None if prefer_auth_json else _get_keyring_password()
+    cache_key = (
+        str(auth_path),
+        prefer_auth_json,
+        auth_json_fingerprint,
+        _keyring_payload_fingerprint(keyring_payload),
+    )
+    if prefer_auth_json:
         tokens = _load_codex_cli_tokens()
         if tokens:
+            _codex_token_cache = (cache_key, tokens, "auth.json")
             return tokens, "auth.json"
 
-    payload = _get_keyring_password()
-    if payload:
+    if keyring_payload:
         try:
-            return CodexOAuthTokens.model_validate_json(payload), "keyring"
+            tokens = CodexOAuthTokens.model_validate_json(keyring_payload)
+            _codex_token_cache = (cache_key, tokens, "keyring")
+            return tokens, "keyring"
         except Exception:
+            _codex_token_cache = (cache_key, None, None)
             return None, None
 
     tokens = _load_codex_cli_tokens()
     if tokens:
+        _codex_token_cache = (cache_key, tokens, "auth.json")
         return tokens, "auth.json"
+    _codex_token_cache = (cache_key, None, None)
     return None, None
 
 
@@ -488,13 +548,34 @@ def load_codex_tokens() -> CodexOAuthTokens | None:
 
 
 def save_codex_tokens(tokens: CodexOAuthTokens) -> None:
-    if _prefer_codex_cli_auth_path():
+    global _codex_token_cache
+
+    auth_path = _resolve_codex_cli_auth_path()
+    prefer_auth_json = _prefer_codex_cli_auth_path()
+    if prefer_auth_json:
         _save_codex_cli_tokens(tokens)
+        _codex_token_cache = (
+            (str(auth_path), True, _auth_json_fingerprint(auth_path), None),
+            tokens,
+            "auth.json",
+        )
         return
-    _set_keyring_password(tokens.model_dump_json())
+    payload = tokens.model_dump_json()
+    _set_keyring_password(payload)
+    _codex_token_cache = (
+        (
+            str(auth_path),
+            False,
+            _auth_json_fingerprint(auth_path),
+            _keyring_payload_fingerprint(payload),
+        ),
+        tokens,
+        "keyring",
+    )
 
 
 def clear_codex_tokens() -> bool:
+    clear_codex_token_cache()
     if not _keyring_payload_present():
         return False
     try:
diff --git a/src/fast_agent/llm/provider/openai/codex_responses.py b/src/fast_agent/llm/provider/openai/codex_responses.py
index 1b8d6cb4e..c60b083e7 100644
--- a/src/fast_agent/llm/provider/openai/codex_responses.py
+++ b/src/fast_agent/llm/provider/openai/codex_responses.py
@@ -130,10 +130,12 @@ async def _create_websocket_connection(
             data={"url": url},
         )
         from fast_agent.llm.provider.openai.codex_oauth import (
+            clear_codex_token_cache,
             load_codex_tokens,
             refresh_codex_tokens,
             save_codex_tokens,
         )
+        clear_codex_token_cache()
         tokens = load_codex_tokens()
         if not tokens or not tokens.refresh_token:
             raise ProviderKeyError(
diff --git a/src/fast_agent/llm/trace.py b/src/fast_agent/llm/trace.py
new file mode 100644
index 000000000..340098594
--- /dev/null
+++ b/src/fast_agent/llm/trace.py
@@ -0,0 +1,28 @@
+"""Runtime LLM tracing state."""
+
+from __future__ import annotations
+
+import os
+
+_OVERRIDE: bool | None = None
+
+
+def llm_trace_enabled() -> bool:
+    if _OVERRIDE is not None:
+        return _OVERRIDE
+    return bool(os.environ.get("FAST_AGENT_LLM_TRACE"))
+
+
+def set_llm_trace_enabled(enabled: bool) -> None:
+    global _OVERRIDE
+    _OVERRIDE = enabled
+    if enabled:
+        os.environ["FAST_AGENT_LLM_TRACE"] = "1"
+    else:
+        os.environ.pop("FAST_AGENT_LLM_TRACE", None)
+
+
+def toggle_llm_trace() -> bool:
+    enabled = not llm_trace_enabled()
+    set_llm_trace_enabled(enabled)
+    return enabled
diff --git a/src/fast_agent/mcp/hf_auth.py b/src/fast_agent/mcp/hf_auth.py
index 715448cc3..b9c1ba769 100644
--- a/src/fast_agent/mcp/hf_auth.py
+++ b/src/fast_agent/mcp/hf_auth.py
@@ -1,13 +1,16 @@
-"""HuggingFace authentication utilities for MCP connections."""
+"""HuggingFace authentication utilities for hosted and remote connections."""
 
 import os
 from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Literal
 from urllib.parse import urlparse
 
 from fast_agent.utils.huggingface_hub import get_huggingface_hub_token
 
 # Type alias for token provider functions
 TokenProvider = Callable[[], str | None]
+HFAuthHeader = Literal["Authorization", "X-HF-Authorization"]
 
 
 def _default_hub_token_provider() -> str | None:
@@ -58,6 +61,17 @@ def is_huggingface_url(url: str) -> bool:
         return False
 
 
+def is_hf_space_url(url: str) -> bool:
+    """Return True when ``url`` is a validated Hugging Face Space hostname."""
+    if not is_huggingface_url(url):
+        return False
+    try:
+        hostname = urlparse(url).hostname
+    except Exception:
+        return False
+    return bool(hostname and hostname.endswith(".hf.space"))
+
+
 def get_hf_token_from_env(
     hub_token_provider: TokenProvider | None = None,
 ) -> str | None:
@@ -82,6 +96,69 @@ def get_hf_token_from_env(
     return provider()
 
 
+def _has_auth_header(headers: dict[str, str] | None) -> bool:
+    if not headers:
+        return False
+    return any(key.lower() in {"authorization", "x-hf-authorization"} for key in headers)
+
+
+def _bearer(value: str) -> str:
+    return f"Bearer {value}"
+
+
+@dataclass(frozen=True)
+class HuggingFaceAuthPolicy:
+    """Policy for attaching Hugging Face bearer credentials to outbound requests.
+
+    The policies below intentionally keep ambient Hugging Face credentials separate
+    from explicit server authentication. Ambient HF tokens use X-HF-Authorization
+    for Spaces so they can be consumed by Space apps without taking over app-level
+    Authorization. Explicit auth, including --auth and OAuth challenges, uses the
+    standard Authorization header because it is authenticating to that endpoint.
+    """
+
+    hf_space_header: HFAuthHeader
+
+    def add_ambient_hf_token(
+        self,
+        url: str,
+        headers: dict[str, str] | None,
+        hub_token_provider: TokenProvider | None = None,
+    ) -> dict[str, str] | None:
+        if not is_huggingface_url(url) or _has_auth_header(headers):
+            return headers
+
+        hf_token = get_hf_token_from_env(hub_token_provider)
+        if hf_token is None:
+            return headers
+
+        return self.add_bearer_token(url, headers, hf_token)
+
+    def add_bearer_token(
+        self,
+        url: str,
+        headers: dict[str, str] | None,
+        token: str,
+    ) -> dict[str, str]:
+        result_headers = dict(headers) if headers else {}
+        result_headers[self.header_for_url(url)] = _bearer(token)
+        return result_headers
+
+    def header_for_url(self, url: str) -> HFAuthHeader:
+        return self.hf_space_header if is_hf_space_url(url) else "Authorization"
+
+
+HF_CLI_AMBIENT_AUTH_POLICY = HuggingFaceAuthPolicy(
+    hf_space_header="X-HF-Authorization",
+)
+HF_EXPLICIT_BEARER_AUTH_POLICY = HuggingFaceAuthPolicy(
+    hf_space_header="Authorization",
+)
+HF_REQUEST_PASSTHROUGH_AUTH_POLICY = HuggingFaceAuthPolicy(
+    hf_space_header="X-HF-Authorization",
+)
+
+
 def should_add_hf_auth(
     url: str,
     existing_headers: dict[str, str] | None,
@@ -108,9 +185,8 @@ def should_add_hf_auth(
         return False
 
     # Don't add auth if Authorization or X-HF-Authorization already present
-    if existing_headers:
-        if "Authorization" in existing_headers or "X-HF-Authorization" in existing_headers:
-            return False
+    if _has_auth_header(existing_headers):
+        return False
 
     return get_hf_token_from_env(hub_token_provider) is not None
 
@@ -132,30 +208,44 @@ def add_hf_auth_header(
     Returns:
         Updated headers dictionary with HF auth if appropriate, or original headers
     """
-    if not should_add_hf_auth(url, headers, hub_token_provider):
+    return HF_CLI_AMBIENT_AUTH_POLICY.add_ambient_hf_token(
+        url,
+        headers,
+        hub_token_provider,
+    )
+
+
+def add_explicit_bearer_auth_header(
+    url: str,
+    headers: dict[str, str] | None,
+    token: str,
+) -> dict[str, str]:
+    """Add explicit bearer auth for a target endpoint.
+
+    This is the policy behind ``--auth`` and OAuth-managed A2A/MCP endpoints.
+    It uses Authorization even for ``*.hf.space`` because the credential is for
+    the target server itself rather than an ambient HF token for a Space app.
+    """
+    return HF_EXPLICIT_BEARER_AUTH_POLICY.add_bearer_token(url, headers, token)
+
+
+def add_forwarded_hf_auth_header(url: str, headers: dict[str, str] | None) -> dict[str, str] | None:
+    """Add the request-scoped bearer token to Hugging Face URLs.
+
+    This is intended for hosted agents that should call Hugging Face services as the
+    inbound user rather than as the Space/server process. Existing auth headers are
+    preserved.
+    """
+    if not is_huggingface_url(url):
         return headers
 
-    hf_token = get_hf_token_from_env(hub_token_provider)
-    if hf_token is None:
+    if _has_auth_header(headers):
         return headers
 
-    # Create new headers dict or copy existing one
-    result_headers = dict(headers) if headers else {}
+    from fast_agent.mcp.auth.context import request_bearer_token
 
-    try:
-        parsed = urlparse(url)
-        hostname = parsed.hostname
-        if hostname and hostname.endswith(".hf.space"):
-            # For .hf.space domains, send BOTH headers:
-            # - Authorization: for the app's OAuth (HF infra doesn't consume this)
-            # - X-HF-Authorization: for HF infrastructure (inference credit tracking)
-            result_headers["Authorization"] = f"Bearer {hf_token}"
-            result_headers["X-HF-Authorization"] = f"Bearer {hf_token}"
-        else:
-            # For other HF domains, use standard Authorization header
-            result_headers["Authorization"] = f"Bearer {hf_token}"
-    except Exception:
-        # Fallback to standard Authorization header
-        result_headers["Authorization"] = f"Bearer {hf_token}"
+    token = request_bearer_token.get()
+    if not token:
+        return headers
 
-    return result_headers
+    return HF_REQUEST_PASSTHROUGH_AUTH_POLICY.add_bearer_token(url, headers, token)
diff --git a/src/fast_agent/mcp/mcp_aggregator.py b/src/fast_agent/mcp/mcp_aggregator.py
index 800ed9f77..d00f33cf0 100644
--- a/src/fast_agent/mcp/mcp_aggregator.py
+++ b/src/fast_agent/mcp/mcp_aggregator.py
@@ -53,6 +53,7 @@
     resolve_model_spec,
 )
 from fast_agent.event_progress import ProgressAction
+from fast_agent.mcp.auth.context import request_bearer_token
 from fast_agent.mcp.common import SEP, create_namespaced_name, is_namespaced_name
 from fast_agent.mcp.experimental_session_client import ExperimentalSessionClient
 from fast_agent.mcp.gen_client import gen_client
@@ -436,6 +437,20 @@ def _require_server_registry(self) -> ServerRegistryProtocol:
             raise RuntimeError("Context is missing server registry for MCP connections")
         return cast("ServerRegistryProtocol", server_registry)
 
+    def _should_use_request_scoped_connection(self, server_name: str) -> bool:
+        """Use a fresh MCP transport when auth.forward depends on request context."""
+        if not request_bearer_token.get():
+            return False
+        try:
+            config = self._require_server_registry().get_server_config(server_name)
+        except Exception:
+            return False
+        return (
+            config is not None
+            and config.auth is not None
+            and config.auth.forward == "huggingface"
+        )
+
     def _require_connection_manager(self) -> MCPConnectionManager:
         if self._persistent_connection_manager is None:
             raise RuntimeError("Persistent connection manager is not initialized")
@@ -1732,7 +1747,9 @@ async def try_execute(client: ClientSession):
 
         # Try initial execution
         try:
-            if self.connection_persistence:
+            if self.connection_persistence and not self._should_use_request_scoped_connection(
+                server_name
+            ):
                 manager = self._require_connection_manager()
                 server_connection = await manager.get_server(
                     server_name, client_session_factory=self._create_session_factory(server_name)
diff --git a/src/fast_agent/mcp/mcp_connection_manager.py b/src/fast_agent/mcp/mcp_connection_manager.py
index d8b2d33d5..82ffbdbb9 100644
--- a/src/fast_agent/mcp/mcp_connection_manager.py
+++ b/src/fast_agent/mcp/mcp_connection_manager.py
@@ -36,6 +36,7 @@
 from fast_agent.core.logging.logger import get_logger
 from fast_agent.event_progress import ProgressAction
 from fast_agent.home import build_child_environment
+from fast_agent.mcp.hf_auth import add_forwarded_hf_auth_header
 from fast_agent.mcp.interfaces import ClientSessionFactory
 from fast_agent.mcp.logger_textio import get_stderr_handler
 from fast_agent.mcp.mcp_agent_client_session import MCPAgentClientSession
@@ -112,6 +113,18 @@ def _prepare_headers_and_auth(
     auth_header_keys = {"authorization", "x-hf-authorization"}
     user_provided_auth_keys = {key for key in headers if key.lower() in auth_header_keys}
 
+    if (
+        server_config.auth is not None
+        and server_config.auth.forward == "huggingface"
+        and server_config.url
+        and not user_provided_auth_keys
+    ):
+        headers = add_forwarded_hf_auth_header(server_config.url, headers) or {}
+        user_provided_auth_keys = {key for key in headers if key.lower() in auth_header_keys}
+
+    if server_config.auth is not None and server_config.auth.forward == "huggingface":
+        return headers, None, user_provided_auth_keys
+
     # OAuth is only relevant for SSE/HTTP transports and should be skipped when the
     # user has already supplied explicit Authorization headers.
     if not trigger_oauth or server_config.transport not in ("sse", "http") or user_provided_auth_keys:
@@ -147,6 +160,8 @@ def _resolve_oauth_mode(
     if trigger_oauth is False:
         return "disabled"
     auth_config = server_config.auth
+    if auth_config is not None and auth_config.forward == "huggingface":
+        return "disabled"
     if auth_config is not None and auth_config.oauth is False:
         return "disabled"
     if trigger_oauth is True:
diff --git a/src/fast_agent/mcp/provider_management.py b/src/fast_agent/mcp/provider_management.py
index 748d4a7b6..f348b747c 100644
--- a/src/fast_agent/mcp/provider_management.py
+++ b/src/fast_agent/mcp/provider_management.py
@@ -39,6 +39,7 @@ def normalize_client_managed_url_server(
     url: str,
     headers: Mapping[str, str] | None,
     access_token: str | None,
+    forward_huggingface: bool = False,
 ) -> tuple[str, dict[str, str] | None]:
     final_headers = dict(headers) if headers else None
 
@@ -55,7 +56,8 @@ def normalize_client_managed_url_server(
     if transport == "http":
         _server_name, _transport, final_url = parse_server_url(url)
 
-    final_headers = add_hf_auth_header(final_url, final_headers)
+    if not forward_huggingface:
+        final_headers = add_hf_auth_header(final_url, final_headers)
     return final_url, final_headers or None
 
 
diff --git a/src/fast_agent/ui/command_payloads.py b/src/fast_agent/ui/command_payloads.py
index 388684c44..a12c229f9 100644
--- a/src/fast_agent/ui/command_payloads.py
+++ b/src/fast_agent/ui/command_payloads.py
@@ -13,6 +13,11 @@ class ShowUsageCommand(CommandBase):
     kind: Literal["show_usage"] = "show_usage"
 
 
+@dataclass(frozen=True, slots=True)
+class ToggleTraceCommand(CommandBase):
+    kind: Literal["toggle_trace"] = "toggle_trace"
+
+
 @dataclass(frozen=True, slots=True)
 class ShowSystemCommand(CommandBase):
     kind: Literal["show_system"] = "show_system"
@@ -31,6 +36,14 @@ class ShowMcpStatusCommand(CommandBase):
 McpConnectMode = Literal["url", "npx", "uvx", "stdio"]
 
 
+@dataclass(frozen=True, slots=True)
+class A2ACommand(CommandBase):
+    action: str
+    argument: str | None
+    error: str | None = None
+    kind: Literal["a2a_command"] = "a2a_command"
+
+
 @dataclass(frozen=True, slots=True)
 class McpListCommand(CommandBase):
     kind: Literal["mcp_list"] = "mcp_list"
@@ -419,9 +432,11 @@ class UnknownCommand(CommandBase):
 
 CommandPayload = (
     ShowUsageCommand
+    | ToggleTraceCommand
     | ShowSystemCommand
     | ShowMarkdownCommand
     | ShowMcpStatusCommand
+    | A2ACommand
     | McpListCommand
     | McpConnectCommand
     | McpDisconnectCommand
diff --git a/src/fast_agent/ui/interactive/command_dispatch.py b/src/fast_agent/ui/interactive/command_dispatch.py
index 78422b59d..bf7d2e9ed 100644
--- a/src/fast_agent/ui/interactive/command_dispatch.py
+++ b/src/fast_agent/ui/interactive/command_dispatch.py
@@ -3,10 +3,14 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, cast
+from typing import TYPE_CHECKING, Any, Callable, cast
 
 from rich import print as rich_print
 
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.connect import parse_a2a_connect_arguments
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
 from fast_agent.command_actions import (
     PluginCommandActionContext,
     PluginCommandActionRegistry,
@@ -31,8 +35,10 @@
 from fast_agent.commands.shared_command_intents import should_default_export_agent
 from fast_agent.core.exceptions import AgentConfigError
 from fast_agent.core.logging.logger import get_logger
+from fast_agent.llm.trace import toggle_llm_trace
 from fast_agent.ui import enhanced_prompt
 from fast_agent.ui.command_payloads import (
+    A2ACommand,
     AgentCommand,
     AttachCommand,
     CardsCommand,
@@ -85,6 +91,7 @@
     SkillsCommand,
     SwitchAgentCommand,
     TitleSessionCommand,
+    ToggleTraceCommand,
     UnknownCommand,
 )
 from fast_agent.ui.history_display import display_history_show
@@ -171,6 +178,12 @@ async def _dispatch_local_ui_payload(
     match payload:
         case InterruptCommand():
             raise KeyboardInterrupt()
+        case ToggleTraceCommand():
+            enabled = toggle_llm_trace()
+            state = "enabled" if enabled else "disabled"
+            style = "red" if enabled else "yellow"
+            rich_print(f"[{style}]LLM trace {state}[/{style}]")
+            return result
         case SwitchAgentCommand(agent_name=new_agent):
             if new_agent in available_agents_set:
                 result.next_agent = new_agent
@@ -347,6 +360,156 @@ async def _dispatch_catalog_payload(
             return None
 
 
+def _default_a2a_agent_name(existing: set[str]) -> str:
+    base = "a2a_remote"
+    if base not in existing:
+        return base
+    index = 2
+    while f"{base}_{index}" in existing:
+        index += 1
+    return f"{base}_{index}"
+
+
+def _print_a2a_help() -> None:
+    rich_print("[bold]A2A commands[/bold]")
+    for line in [
+        "/a2a list",
+        "/a2a status [agent]",
+        "/a2a card [agent]",
+        "/a2a transport [agent]",
+        "/a2a reset [agent]",
+        "/a2a connect <url> [--transport JSONRPC|HTTP+JSON] [--name NAME] [--card-path PATH] [--oauth|--no-oauth]",
+        "/a2a help",
+    ]:
+        rich_print(f"  {line}")
+
+
+async def _dispatch_a2a_payload(
+    owner: "InteractivePrompt",
+    payload: CommandPayload,
+    *,
+    prompt_provider: "AgentApp",
+    agent: str,
+    available_agents_set: set[str],
+) -> DispatchResult | None:
+    if not isinstance(payload, A2ACommand):
+        return None
+
+    result = DispatchResult(handled=True)
+    if payload.error:
+        rich_print(f"[red]{payload.error}[/red]")
+        return result
+
+    match payload.action:
+        case "help" | "?" | "-h" | "--help" | "commands":
+            _print_a2a_help()
+            return result
+        case "list":
+            names = sorted(
+                name
+                for name in available_agents_set
+                if owner.agent_types.get(name) == AgentType.A2A
+            )
+            if not names:
+                rich_print("[yellow]No A2A agents are currently registered.[/yellow]")
+                return result
+            rich_print("[bold]A2A agents[/bold]")
+            for name in names:
+                rich_print(f"  • {name}")
+            return result
+        case "status" | "card" | "reset" | "transport":
+            target = payload.argument or agent
+            remote_agent = owner._get_agent_or_warn(prompt_provider, target)
+            if remote_agent is None:
+                return result
+            if not isinstance(remote_agent, A2ARemoteAgent):
+                rich_print(f"[red]Agent '{target}' is not an A2A agent.[/red]")
+                return result
+            if payload.action == "transport":
+                diagnostics = remote_agent.diagnostics()
+                rich_print(f"[bold]A2A transport: {target}[/bold]")
+                rich_print(f"  Requested: {diagnostics.transport or 'auto'}")
+                rich_print(f"  Selected client: {diagnostics.selected_transport_class or 'uninitialized'}")
+                card = remote_agent.remote_card
+                if card is not None:
+                    rich_print("  Advertised interfaces:")
+                    for interface in card.supported_interfaces:
+                        rich_print(
+                            f"    • {interface.protocol_binding} "
+                            f"{interface.protocol_version}: {interface.url}"
+                        )
+                return result
+            if payload.action == "reset":
+                remote_agent.reset_a2a_state()
+                rich_print(f"[green]Reset A2A state for {target}.[/green]")
+                return result
+            if payload.action == "status":
+                diagnostics = remote_agent.diagnostics()
+                rich_print(f"[bold]A2A status: {target}[/bold]")
+                rich_print(f"  URL: {diagnostics.url}")
+                rich_print(f"  Transport: {diagnostics.transport or 'auto'}")
+                rich_print(f"  Remote: {diagnostics.remote_name or 'unresolved'}")
+                rich_print(f"  Context: {diagnostics.context_id or '-'}")
+                rich_print(f"  Task: {diagnostics.current_task_id or '-'}")
+                rich_print(f"  Last state: {diagnostics.last_task_state or '-'}")
+                rich_print(f"  Client transport: {diagnostics.selected_transport_class or '-'}")
+                return result
+            card = remote_agent.remote_card
+            if card is None:
+                rich_print(f"[yellow]Agent '{target}' has not resolved a remote card yet.[/yellow]")
+                return result
+            rich_print(f"[bold]A2A card: {card.name}[/bold]")
+            rich_print(f"  Description: {card.description}")
+            rich_print(f"  Version: {card.version}")
+            rich_print("  Interfaces:")
+            for interface in card.supported_interfaces:
+                rich_print(
+                    f"    • {interface.protocol_binding} "
+                    f"{interface.protocol_version}: {interface.url}"
+                )
+            return result
+        case "connect":
+            request, error = parse_a2a_connect_arguments(payload.argument)
+            if error or request is None:
+                rich_print(f"[red]{error}[/red]")
+                return result
+            name = request.name or _default_a2a_agent_name(available_agents_set)
+            if name in available_agents_set:
+                rich_print(f"[red]Agent '{name}' already exists. Choose --name NAME.[/red]")
+                return result
+            remote_agent = A2ARemoteAgent(
+                config=AgentConfig(name=name, agent_type=AgentType.A2A, use_history=True),
+                a2a_config=A2AAgentConfig(
+                    url=request.url,
+                    transport=request.transport,
+                    auth=request.auth,
+                    relative_card_path=request.relative_card_path,
+                ),
+            )
+            try:
+                await remote_agent.initialize()
+            except Exception as exc:
+                await remote_agent.shutdown()
+                rich_print(f"[red]Unable to connect to A2A agent: {exc}[/red]")
+                return result
+            agents = cast("dict[str, Any]", prompt_provider.registered_agents())
+            agents[name] = remote_agent
+            prompt_provider._apply_agent_registry()
+            owner.agent_types[name] = AgentType.A2A
+            result.next_agent = name
+            result.available_agents = prompt_provider.visible_agent_names(force_include=name)
+            result.available_agents_set = set(result.available_agents)
+            rich_print(f"[green]Connected A2A agent '{name}'.[/green]")
+            rich_print(f"  URL: {request.url}")
+            rich_print(f"  Transport: {request.transport or 'auto'}")
+            if remote_agent.remote_card is not None:
+                rich_print(f"  Remote: {remote_agent.remote_card.name}")
+            return result
+        case _:
+            rich_print(f"[red]Unknown /a2a action: {payload.action}[/red]")
+            return result
+
+
 async def _dispatch_display_payload(
     payload: CommandPayload,
     *,
@@ -984,6 +1147,16 @@ async def dispatch_command_payload(
     if catalog_result is not None:
         return catalog_result
 
+    a2a_result = await _dispatch_a2a_payload(
+        owner,
+        payload,
+        prompt_provider=prompt_provider,
+        agent=agent,
+        available_agents_set=available_agents_set,
+    )
+    if a2a_result is not None:
+        return a2a_result
+
     display_result = await _dispatch_display_payload(
         payload,
         prompt_provider=prompt_provider,
diff --git a/src/fast_agent/ui/interactive_prompt.py b/src/fast_agent/ui/interactive_prompt.py
index 05bc30519..5b6a47988 100644
--- a/src/fast_agent/ui/interactive_prompt.py
+++ b/src/fast_agent/ui/interactive_prompt.py
@@ -941,6 +941,7 @@ async def _send_regular_message(
         prompt_payload: str | PromptMessageExtended,
         prompt_provider: "AgentApp",
         agent_name: str,
+        display: "ConsoleDisplay",
         runtime_state: PromptLoopRuntimeState,
     ) -> PromptLoopResult | None:
         emit_prompt_mark("C")
@@ -1123,6 +1124,7 @@ async def prompt_loop(
                 prompt_payload=prompt_payload,
                 prompt_provider=prompt_provider,
                 agent_name=agent_state.current_agent,
+                display=display,
                 runtime_state=runtime_state,
             )
             if send_result is None:
diff --git a/src/fast_agent/ui/prompt/completion_sources.py b/src/fast_agent/ui/prompt/completion_sources.py
index 13abbaaa6..caf22916e 100644
--- a/src/fast_agent/ui/prompt/completion_sources.py
+++ b/src/fast_agent/ui/prompt/completion_sources.py
@@ -87,7 +87,7 @@ def _complete_attached_mcp_servers(completer: "AgentCompleter", partial: str) ->
 def _mcp_connect_completions(completer: "AgentCompleter", remainder: str) -> list[Completion]:
     connect_flags = {
         "--name": "set attached server name",
-        "--auth": "set bearer token for URL servers",
+        "--auth": "set bearer token for remote MCP/A2A",
         "--timeout": "set startup timeout in seconds",
         "--oauth": "enable oauth flow",
         "--no-oauth": "disable oauth flow",
diff --git a/src/fast_agent/ui/prompt/input_toolbar.py b/src/fast_agent/ui/prompt/input_toolbar.py
index 1c51c8b7d..5fae6d0e8 100644
--- a/src/fast_agent/ui/prompt/input_toolbar.py
+++ b/src/fast_agent/ui/prompt/input_toolbar.py
@@ -9,10 +9,13 @@
 
 from prompt_toolkit.formatted_text import HTML
 
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentType
 from fast_agent.agents.workflow.parallel_agent import ParallelAgent
 from fast_agent.llm.model_display_name import resolve_model_display_name
 from fast_agent.llm.model_info import ModelInfo
 from fast_agent.llm.provider_types import Provider
+from fast_agent.llm.trace import llm_trace_enabled
 from fast_agent.ui import notification_tracker
 from fast_agent.ui.attachment_indicator import (
     DraftAttachmentSummary,
@@ -303,6 +306,14 @@ def _build_toolbar_agent_state(
         return ToolbarAgentState()
 
     turn_count = _turn_count_for_agent(agent)
+    if _is_a2a_agent(agent):
+        return ToolbarAgentState(
+            agent=agent,
+            model_name="A2A",
+            model_display=_resolve_a2a_display(agent),
+            turn_count=turn_count,
+        )
+
     context_pct, usage_accumulator = _usage_context_for_agent(agent)
     model_name = _resolve_model_name(agent, llm)
     model_display = _resolve_model_display(agent, model_name, llm=llm)
@@ -325,6 +336,18 @@ def _build_toolbar_agent_state(
     )
 
 
+def _is_a2a_agent(agent: AgentProtocol) -> bool:
+    agent_type = getattr(agent, "agent_type", None)
+    normalized = getattr(agent_type, "value", agent_type)
+    return isinstance(normalized, str) and normalized.lower() == AgentType.A2A.value
+
+
+def _resolve_a2a_display(agent: AgentProtocol) -> str:
+    if isinstance(agent, A2ARemoteAgent) and agent.remote_card is not None:
+        return _truncate_model_display(agent.remote_card.name)
+    return "A2A"
+
+
 def _build_toolbar_agent_state_cache_key(
     agent: AgentProtocol | None,
     *,
@@ -593,6 +616,8 @@ def _build_middle_segment(
     )
     if shortcut_text:
         middle_segments.append(shortcut_text)
+    if llm_trace_enabled():
+        middle_segments.append("<style fg='ansired' bg='ansiblack'>*</style>")
     return " | ".join(middle_segments)
 
 
diff --git a/src/fast_agent/ui/prompt/parser.py b/src/fast_agent/ui/prompt/parser.py
index 8e3bc3bf5..a3f254c00 100644
--- a/src/fast_agent/ui/prompt/parser.py
+++ b/src/fast_agent/ui/prompt/parser.py
@@ -19,6 +19,7 @@
 )
 from fast_agent.mcp.connect_targets import parse_connect_command_text
 from fast_agent.ui.command_payloads import (
+    A2ACommand,
     AgentCommand,
     AttachCommand,
     CardsCommand,
@@ -67,6 +68,7 @@
     SkillsCommand,
     SwitchAgentCommand,
     TitleSessionCommand,
+    ToggleTraceCommand,
     UnknownCommand,
 )
 from fast_agent.utils.commandline import split_commandline
@@ -606,6 +608,29 @@ def _parse_slash_alias_command(
     return None
 
 
+def _parse_a2a_command(remainder: str) -> CommandPayload:
+    if not remainder:
+        return A2ACommand(action="status", argument=None)
+    tokens = remainder.split(maxsplit=1)
+    action = tokens[0].lower()
+    argument = tokens[1].strip() if len(tokens) > 1 else None
+    if action in {
+        "list",
+        "status",
+        "card",
+        "reset",
+        "connect",
+        "transport",
+        "help",
+        "?",
+        "-h",
+        "--help",
+        "commands",
+    }:
+        return A2ACommand(action=action, argument=argument)
+    return A2ACommand(action=action, argument=argument, error=f"Unknown /a2a action: {action}")
+
+
 def _parse_slash_command(cmd_line: str) -> str | CommandPayload:
     cmd_parts = cmd_line[1:].strip().split(maxsplit=1)
     cmd = cmd_parts[0].lower()
@@ -627,6 +652,7 @@ def _parse_slash_command(cmd_line: str) -> str | CommandPayload:
         return simple_factory()
 
     command_parsers: dict[str, Callable[[str], CommandPayload]] = {
+        "a2a": _parse_a2a_command,
         "history": _parse_history_command,
         "session": _parse_session_command,
         "card": _parse_card_command,
@@ -653,6 +679,9 @@ def parse_special_input(text: str) -> str | CommandPayload:
     stripped = text.lstrip()
     cmd_line = stripped.splitlines()[0] if stripped.startswith("/") else text
 
+    if text.strip() == "***TRACE":
+        return ToggleTraceCommand()
+
     if cmd_line and cmd_line.startswith("/"):
         if cmd_line == "/":
             return ""
diff --git a/src/fast_agent/ui/prompt/toolbar.py b/src/fast_agent/ui/prompt/toolbar.py
index 0e6270b62..0599037c2 100644
--- a/src/fast_agent/ui/prompt/toolbar.py
+++ b/src/fast_agent/ui/prompt/toolbar.py
@@ -159,9 +159,28 @@ def _is_smart_agent(agent: object | None) -> bool:
     return normalized == AgentType.SMART
 
 
+def _is_a2a_agent(agent: object | None) -> bool:
+    """Return True when the provided agent instance is a remote A2A agent."""
+    if agent is None:
+        return False
+    agent_type = getattr(agent, "agent_type", None)
+    normalized = getattr(agent_type, "value", agent_type)
+    if isinstance(normalized, str):
+        return normalized.lower() == AgentType.A2A.value
+    return normalized == AgentType.A2A
+
+
 def _format_toolbar_agent_identity(
     agent_name: str, toolbar_color: str, agent: object | None
 ) -> str:
-    """Render toolbar agent identity, suffixing [S] for smart agents."""
-    label = f"{agent_name}[S]" if _is_smart_agent(agent) else agent_name
-    return f"<style fg='{toolbar_color}' bg='ansiblack'> {label} </style>"
+    """Render toolbar agent identity, suffixing special agent kinds."""
+    if _is_a2a_agent(agent):
+        label = f"{agent_name}[A2A]"
+        color = "ansimagenta"
+    elif _is_smart_agent(agent):
+        label = f"{agent_name}[S]"
+        color = toolbar_color
+    else:
+        label = agent_name
+        color = toolbar_color
+    return f"<style fg='{color}' bg='ansiblack'> {label} </style>"
diff --git a/tests/integration/a2a/conftest.py b/tests/integration/a2a/conftest.py
new file mode 100644
index 000000000..15bad2e85
--- /dev/null
+++ b/tests/integration/a2a/conftest.py
@@ -0,0 +1,335 @@
+from __future__ import annotations
+
+import asyncio
+import uuid
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import pytest_asyncio
+import uvicorn
+from a2a.server.agent_execution.agent_executor import AgentExecutor
+from a2a.server.request_handlers import DefaultRequestHandler
+from a2a.server.routes import (
+    create_agent_card_routes,
+    create_jsonrpc_routes,
+    create_rest_routes,
+)
+from a2a.server.tasks.inmemory_task_store import InMemoryTaskStore
+from a2a.server.tasks.task_updater import TaskUpdater
+from a2a.types import (
+    AgentCapabilities,
+    AgentCard,
+    AgentInterface,
+    AgentProvider,
+    AgentSkill,
+    Message,
+    Part,
+    Role,
+    Task,
+    TaskState,
+    TaskStatus,
+)
+from fastapi import FastAPI
+from google.protobuf.json_format import ParseDict
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+
+    from a2a.server.agent_execution.context import RequestContext
+    from a2a.server.events.event_queue import EventQueue
+
+def _data_part(value: dict[str, object]) -> Part:
+    part = Part()
+    ParseDict(value, part.data)
+    return part
+
+
+LONG_STREAM_CHUNKS = [
+    "Starting the remote analysis task.\n\n",
+    "Step 1 — Reading the request and identifying the goal.\n",
+    "Step 2 — Checking the available A2A task context.\n",
+    "Step 3 — Building a concise response plan.\n",
+    "Step 4 — Verifying the streamed artifact updates are ordered.\n",
+    "Step 5 — Preparing the final summary.\n\n",
+    "Remote analysis complete.",
+]
+
+
+FAKE_A2A_HELP = """Fake A2A server commands:
+- hello: echo a normal response
+- please stream: emit two short streaming artifact updates
+- please long stream: emit a longer multi-step streaming artifact
+- respond with files: return text, URL, data, and raw byte parts
+- artifact append: replace and append updates on the same artifact
+- need input: enter an INPUT_REQUIRED task; reply with a value such as blue
+- help: show this menu"""
+
+
+def _is_help_query(query: str) -> bool:
+    normalized = query.strip().lower()
+    return normalized in {"help", "?", "commands", "menu"} or "what can you do" in normalized
+
+
+def _agent_message(*, text: str, context_id: str | None) -> Message:
+    message = Message(
+        role=Role.ROLE_AGENT,
+        message_id=str(uuid.uuid4()),
+        parts=[Part(text=text)],
+    )
+    if context_id:
+        message.context_id = context_id
+    return message
+
+
+@dataclass(frozen=True)
+class A2ATestServer:
+    base_url: str
+    card: AgentCard
+    executor: EchoAgentExecutor
+
+
+class EchoAgentExecutor(AgentExecutor):
+    def __init__(self) -> None:
+        self.seen_queries: list[str] = []
+        self.seen_part_kinds: list[list[str]] = []
+        self.pending_input_tasks: set[str] = set()
+
+    async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=context.task_id or "",
+            context_id=context.context_id or "",
+        )
+        await updater.cancel()
+
+    async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
+        if not context.message:
+            return
+        query = context.get_user_input()
+        self.seen_queries.append(query)
+        self.seen_part_kinds.append(
+            [part.WhichOneof("content") or "unknown" for part in context.message.parts]
+        )
+
+        if _is_help_query(query) and context.task_id not in self.pending_input_tasks:
+            await event_queue.enqueue_event(
+                _agent_message(text=FAKE_A2A_HELP, context_id=context.context_id)
+            )
+            return
+
+        normalized_query = query.lower()
+        taskless_query = not any(
+            marker in normalized_query
+            for marker in [
+                "long stream",
+                "stream",
+                "respond with files",
+                "artifact append",
+                "need input",
+            ]
+        )
+        if context.task_id not in self.pending_input_tasks and taskless_query:
+            await asyncio.sleep(0.01)
+            summary = ",".join(self.seen_part_kinds[-1])
+            await event_queue.enqueue_event(
+                _agent_message(text=f"echo: {query} [{summary}]", context_id=context.context_id)
+            )
+            return
+
+        if not context.task_id or not context.context_id:
+            return
+
+        await event_queue.enqueue_event(
+            Task(
+                id=context.task_id,
+                context_id=context.context_id,
+                status=TaskStatus(state=TaskState.TASK_STATE_SUBMITTED),
+                history=[context.message],
+            )
+        )
+
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=context.task_id,
+            context_id=context.context_id,
+        )
+        await updater.start_work(
+            message=updater.new_agent_message(parts=[Part(text="working")])
+        )
+
+        if _is_help_query(query):
+            if context.task_id in self.pending_input_tasks:
+                await updater.update_status(
+                    TaskState.TASK_STATE_INPUT_REQUIRED,
+                    message=updater.new_agent_message(
+                        parts=[
+                            Part(
+                                text=(
+                                    f"{FAKE_A2A_HELP}\n\n"
+                                    "Current task is still waiting for input."
+                                )
+                            )
+                        ]
+                    ),
+                )
+                return
+
+        if context.task_id in self.pending_input_tasks:
+            self.pending_input_tasks.remove(context.task_id)
+            await updater.add_artifact(
+                parts=[Part(text=f"input received: {query}")],
+                name="input-response",
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "long stream" in query:
+            for index, chunk in enumerate(LONG_STREAM_CHUNKS, start=1):
+                await updater.add_artifact(
+                    parts=[Part(text=chunk)],
+                    name="long-stream",
+                    last_chunk=index == len(LONG_STREAM_CHUNKS),
+                )
+                await asyncio.sleep(0.01)
+            await updater.complete()
+            return
+
+        if "stream" in query:
+            await updater.add_artifact(
+                parts=[Part(text="stream chunk one")],
+                name="stream",
+                last_chunk=False,
+            )
+            await asyncio.sleep(0.01)
+            await updater.add_artifact(
+                parts=[Part(text="stream chunk two")],
+                name="stream",
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "respond with files" in query:
+            await updater.add_artifact(
+                parts=[
+                    Part(text="file response"),
+                    Part(
+                        url="https://example.com/report.pdf",
+                        media_type="application/pdf",
+                        filename="report.pdf",
+                    ),
+                    _data_part({"ok": True, "count": 2}),
+                    Part(raw=b"abc", media_type="text/plain", filename="note.txt"),
+                ],
+                name="files",
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "artifact append" in query:
+            artifact_id = "append-contract"
+            await updater.add_artifact(
+                parts=[Part(text="draft")],
+                name="append-contract",
+                artifact_id=artifact_id,
+                append=False,
+                last_chunk=False,
+            )
+            await updater.add_artifact(
+                parts=[Part(text="final")],
+                name="append-contract",
+                artifact_id=artifact_id,
+                append=False,
+                last_chunk=False,
+            )
+            await updater.add_artifact(
+                parts=[Part(text="\nrepeat")],
+                name="append-contract",
+                artifact_id=artifact_id,
+                append=True,
+                last_chunk=False,
+            )
+            await updater.add_artifact(
+                parts=[Part(text="\nrepeat")],
+                name="append-contract",
+                artifact_id=artifact_id,
+                append=True,
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "need input" in query:
+            self.pending_input_tasks.add(context.task_id)
+            await updater.update_status(
+                TaskState.TASK_STATE_INPUT_REQUIRED,
+                message=updater.new_agent_message(
+                    parts=[Part(text="Please provide the missing value.")]
+                ),
+            )
+            return
+
+
+
+@pytest_asyncio.fixture
+async def a2a_test_server(unused_tcp_port: int, wait_for_port) -> AsyncIterator[A2ATestServer]:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    base_url = f"http://{host}:{port}"
+    card = AgentCard(
+        name="fast-agent test A2A server",
+        description="Deterministic A2A test server.",
+        provider=AgentProvider(organization="fast-agent", url="https://fast-agent.ai"),
+        version="1.0.0",
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        default_input_modes=["text/plain"],
+        default_output_modes=["text/plain"],
+        skills=[
+            AgentSkill(
+                id="echo",
+                name="Echo",
+                description="Echo user input.",
+                tags=["test"],
+                examples=["hello"],
+                input_modes=["text/plain"],
+                output_modes=["text/plain"],
+            )
+        ],
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/jsonrpc",
+            ),
+            AgentInterface(
+                protocol_binding="HTTP+JSON",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/rest",
+            ),
+        ],
+    )
+    executor = EchoAgentExecutor()
+    request_handler = DefaultRequestHandler(
+        agent_executor=executor,
+        task_store=InMemoryTaskStore(),
+        agent_card=card,
+    )
+
+    app = FastAPI()
+    app.routes.extend(create_agent_card_routes(agent_card=card))
+    app.routes.extend(
+        create_jsonrpc_routes(request_handler=request_handler, rpc_url="/a2a/jsonrpc")
+    )
+    app.routes.extend(create_rest_routes(request_handler=request_handler, path_prefix="/a2a/rest"))
+
+    server = uvicorn.Server(uvicorn.Config(app, host=host, port=port, log_level="warning"))
+    task = asyncio.create_task(server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    try:
+        yield A2ATestServer(base_url=base_url, card=card, executor=executor)
+    finally:
+        server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
diff --git a/tests/integration/a2a/fake_server.py b/tests/integration/a2a/fake_server.py
new file mode 100644
index 000000000..1a03056ae
--- /dev/null
+++ b/tests/integration/a2a/fake_server.py
@@ -0,0 +1,279 @@
+"""Deterministic A2A server for fast-agent CLI/TUI smoke tests.
+
+Run:
+    uv run python tests/integration/a2a/fake_server.py --port 41242
+
+Useful prompts:
+    help
+    hello
+    please stream
+    please long stream
+    respond with files
+    need input
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+import uuid
+from typing import TYPE_CHECKING
+
+import uvicorn
+from a2a.server.agent_execution.agent_executor import AgentExecutor
+from a2a.server.request_handlers import DefaultRequestHandler
+from a2a.server.routes import create_agent_card_routes, create_jsonrpc_routes, create_rest_routes
+from a2a.server.tasks.inmemory_task_store import InMemoryTaskStore
+from a2a.server.tasks.task_updater import TaskUpdater
+from a2a.types import (
+    AgentCapabilities,
+    AgentCard,
+    AgentInterface,
+    AgentProvider,
+    AgentSkill,
+    Message,
+    Part,
+    Role,
+    Task,
+    TaskState,
+    TaskStatus,
+)
+from fastapi import FastAPI
+from google.protobuf.json_format import ParseDict
+
+if TYPE_CHECKING:
+    from a2a.server.agent_execution.context import RequestContext
+    from a2a.server.events.event_queue import EventQueue
+
+logger = logging.getLogger(__name__)
+
+LONG_STREAM_CHUNKS = [
+    "Starting the remote analysis task.\n\n",
+    "Step 1 — Reading the request and identifying the goal.\n",
+    "Step 2 — Checking the available A2A task context.\n",
+    "Step 3 — Building a concise response plan.\n",
+    "Step 4 — Verifying the streamed artifact updates are ordered.\n",
+    "Step 5 — Preparing the final summary.\n\n",
+    "Remote analysis complete.",
+]
+
+FAKE_A2A_HELP = """Fake A2A server commands:
+- hello: echo a normal response
+- please stream: emit two short streaming artifact updates
+- please long stream: emit a longer multi-step streaming artifact
+- respond with files: return text, URL, data, and raw byte parts
+- need input: enter an INPUT_REQUIRED task; reply with a value such as blue
+- help: show this menu"""
+
+
+def _is_help_query(query: str) -> bool:
+    normalized = query.strip().lower()
+    return normalized in {"help", "?", "commands", "menu"} or "what can you do" in normalized
+
+
+def _agent_message(*, text: str, context_id: str | None) -> Message:
+    message = Message(
+        role=Role.ROLE_AGENT,
+        message_id=str(uuid.uuid4()),
+        parts=[Part(text=text)],
+    )
+    if context_id:
+        message.context_id = context_id
+    return message
+
+
+class FakeAgentExecutor(AgentExecutor):
+    def __init__(self) -> None:
+        self.pending_input_tasks: set[str] = set()
+
+    async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=context.task_id or "",
+            context_id=context.context_id or "",
+        )
+        await updater.cancel()
+
+    async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
+        if not context.message:
+            return
+
+        query = context.get_user_input()
+        if _is_help_query(query) and context.task_id not in self.pending_input_tasks:
+            await event_queue.enqueue_event(
+                _agent_message(text=FAKE_A2A_HELP, context_id=context.context_id)
+            )
+            return
+
+        taskless_query = not any(
+            marker in query.lower()
+            for marker in ["long stream", "stream", "files", "need input"]
+        )
+        if context.task_id not in self.pending_input_tasks and taskless_query:
+            kinds = ",".join(
+                part.WhichOneof("content") or "unknown" for part in context.message.parts
+            )
+            await event_queue.enqueue_event(
+                _agent_message(
+                    text=f"fake echo: {query} [{kinds}]",
+                    context_id=context.context_id,
+                )
+            )
+            return
+
+        if not context.task_id or not context.context_id:
+            return
+
+        await event_queue.enqueue_event(
+            Task(
+                id=context.task_id,
+                context_id=context.context_id,
+                status=TaskStatus(state=TaskState.TASK_STATE_SUBMITTED),
+                history=[context.message],
+            )
+        )
+        updater = TaskUpdater(
+            event_queue=event_queue,
+            task_id=context.task_id,
+            context_id=context.context_id,
+        )
+        await updater.start_work(message=updater.new_agent_message(parts=[Part(text="working")]))
+
+        if _is_help_query(query):
+            if context.task_id in self.pending_input_tasks:
+                await updater.update_status(
+                    TaskState.TASK_STATE_INPUT_REQUIRED,
+                    message=updater.new_agent_message(
+                        parts=[
+                            Part(
+                                text=(
+                                    f"{FAKE_A2A_HELP}\n\n"
+                                    "Current task is still waiting for input."
+                                )
+                            )
+                        ]
+                    ),
+                )
+                return
+
+        if context.task_id in self.pending_input_tasks:
+            self.pending_input_tasks.remove(context.task_id)
+            await updater.add_artifact(
+                parts=[Part(text=f"input received: {query}")],
+                name="input-response",
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "long stream" in query.lower():
+            for index, chunk in enumerate(LONG_STREAM_CHUNKS, start=1):
+                await updater.add_artifact(
+                    parts=[Part(text=chunk)],
+                    name="long-stream",
+                    last_chunk=index == len(LONG_STREAM_CHUNKS),
+                )
+                await asyncio.sleep(0.35)
+            await updater.complete()
+            return
+
+        if "stream" in query.lower():
+            await updater.add_artifact(parts=[Part(text="stream chunk one")], name="stream")
+            await asyncio.sleep(0.4)
+            await updater.add_artifact(
+                parts=[Part(text="stream chunk two")], name="stream", last_chunk=True
+            )
+            await updater.complete()
+            return
+
+        if "files" in query.lower():
+            data_part = Part()
+            ParseDict({"ok": True, "source": "fake-a2a-server"}, data_part.data)
+            await updater.add_artifact(
+                parts=[
+                    Part(text="file response"),
+                    Part(
+                        url="https://example.com/report.pdf",
+                        media_type="application/pdf",
+                        filename="report.pdf",
+                    ),
+                    data_part,
+                    Part(raw=b"abc", media_type="text/plain", filename="note.txt"),
+                ],
+                name="files",
+                last_chunk=True,
+            )
+            await updater.complete()
+            return
+
+        if "need input" in query.lower():
+            self.pending_input_tasks.add(context.task_id)
+            await updater.update_status(
+                TaskState.TASK_STATE_INPUT_REQUIRED,
+                message=updater.new_agent_message(
+                    parts=[Part(text="Please provide the missing value.")]
+                ),
+            )
+            return
+
+
+
+def build_app(host: str, port: int) -> FastAPI:
+    base_url = f"http://{host}:{port}"
+    card = AgentCard(
+        name="fast-agent fake A2A server",
+        description="Deterministic server for fast-agent A2A demos and tests.",
+        provider=AgentProvider(organization="fast-agent", url="https://fast-agent.ai"),
+        version="1.0.0",
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        default_input_modes=["text/plain", "application/octet-stream", "image/*"],
+        default_output_modes=["text/plain", "application/json", "application/octet-stream"],
+        skills=[
+            AgentSkill(
+                id="fake_echo_stream_files",
+                name="Fake echo/stream/files",
+                description="Echoes text, streams short/long chunks, and returns URL/data/raw parts.",
+                tags=["test", "streaming", "files"],
+                examples=["hello", "please stream", "please long stream", "respond with files"],
+                input_modes=["text/plain", "application/octet-stream", "image/*"],
+                output_modes=["text/plain", "application/json", "application/octet-stream"],
+            )
+        ],
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/jsonrpc",
+            ),
+            AgentInterface(
+                protocol_binding="HTTP+JSON",
+                protocol_version="1.0",
+                url=f"{base_url}/a2a/rest",
+            ),
+        ],
+    )
+    request_handler = DefaultRequestHandler(
+        agent_executor=FakeAgentExecutor(),
+        task_store=InMemoryTaskStore(),
+        agent_card=card,
+    )
+    app = FastAPI()
+    app.routes.extend(create_agent_card_routes(agent_card=card))
+    app.routes.extend(create_jsonrpc_routes(request_handler=request_handler, rpc_url="/a2a/jsonrpc"))
+    app.routes.extend(create_rest_routes(request_handler=request_handler, path_prefix="/a2a/rest"))
+    return app
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="fast-agent fake A2A server")
+    parser.add_argument("--host", default="127.0.0.1")
+    parser.add_argument("--port", type=int, default=41242)
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.INFO)
+    logger.info("Agent card: http://%s:%s/.well-known/agent-card.json", args.host, args.port)
+    uvicorn.run(build_app(args.host, args.port), host=args.host, port=args.port, log_level="info")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/a2a/test_fast_agent_a2a_server.py b/tests/integration/a2a/test_fast_agent_a2a_server.py
new file mode 100644
index 000000000..2a6ee198e
--- /dev/null
+++ b/tests/integration/a2a/test_fast_agent_a2a_server.py
@@ -0,0 +1,1418 @@
+from __future__ import annotations
+
+import asyncio
+import contextlib
+from dataclasses import dataclass, field
+from types import SimpleNamespace
+from typing import TYPE_CHECKING, Any, cast
+
+import httpx
+import pytest
+import pytest_asyncio
+import uvicorn
+from a2a.client import ClientConfig, create_client
+from a2a.types import (
+    CancelTaskRequest,
+    GetTaskRequest,
+    ListTasksRequest,
+    Message,
+    Part,
+    Role,
+    SendMessageRequest,
+    TaskState,
+)
+from fastapi.testclient import TestClient
+from google.protobuf.json_format import MessageToDict
+from mcp.types import (
+    BlobResourceContents,
+    EmbeddedResource,
+    ImageContent,
+    TextContent,
+    TextResourceContents,
+)
+from pydantic import AnyUrl
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.a2a.server import (
+    AgentA2AServer,
+    _bearer_token_from_call_context,
+    _parts_from_prompt_message,
+    _prompt_from_a2a_message,
+)
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.core.agent_app import AgentApp
+from fast_agent.core.fastagent import AgentInstance
+from fast_agent.llm.stream_types import StreamChunk
+from fast_agent.mcp.auth.context import request_bearer_token
+from fast_agent.types import LlmStopReason, PromptMessageExtended
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+
+    from fast_agent.interfaces import AgentProtocol
+
+
+@dataclass
+class RecordingAgent:
+    name: str = "worker"
+    agent_type: AgentType = AgentType.BASIC
+    message_history: list[PromptMessageExtended] = field(default_factory=list)
+    received: list[PromptMessageExtended] = field(default_factory=list)
+    config: AgentConfig = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.config = AgentConfig(
+            name=self.name,
+            agent_type=self.agent_type,
+            default=True,
+            use_history=True,
+        )
+
+    async def initialize(self) -> None:
+        return None
+
+    async def shutdown(self) -> None:
+        return None
+
+    async def set_model(self, model: str | None) -> None:
+        del model
+
+    def clear(self, *, clear_prompts: bool = False) -> None:
+        del clear_prompts
+        self.message_history.clear()
+
+    def pop_last_message(self) -> PromptMessageExtended | None:
+        return self.message_history.pop() if self.message_history else None
+
+    async def __call__(self, message: Any) -> str:
+        return await self.send(message)
+
+    async def send(self, message: Any, request_params: Any = None) -> str:
+        response = await self.generate(message, request_params=request_params)
+        return response.all_text()
+
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del request_params
+        if isinstance(messages, PromptMessageExtended):
+            prompt = messages
+        else:
+            prompt = PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text=str(messages))],
+            )
+        self.received.append(prompt)
+        self.message_history.append(prompt)
+        response = PromptMessageExtended(
+            role="assistant",
+            content=[
+                TextContent(
+                    type="text",
+                    text=f"server saw {len(self.message_history)}: {prompt.all_text()}",
+                )
+            ],
+        )
+        self.message_history.append(response)
+        return response
+
+    async def structured(self, messages: Any, model: type, request_params: Any = None) -> tuple:
+        del model
+        return None, await self.generate(messages, request_params=request_params)
+
+
+class StreamingRecordingAgent(RecordingAgent):
+    def __init__(self, name: str = "worker") -> None:
+        super().__init__(name=name)
+        self._stream_listeners: list[Any] = []
+
+    def add_stream_listener(self, listener: Any) -> Any:
+        self._stream_listeners.append(listener)
+
+        def remove_listener() -> None:
+            if listener in self._stream_listeners:
+                self._stream_listeners.remove(listener)
+
+        return remove_listener
+
+    def add_tool_stream_listener(self, listener: Any) -> Any:
+        del listener
+
+        def remove_listener() -> None:
+            return None
+
+        return remove_listener
+
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del request_params
+        if isinstance(messages, PromptMessageExtended):
+            prompt = messages
+        else:
+            prompt = PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text=str(messages))],
+            )
+        self.received.append(prompt)
+        self.message_history.append(prompt)
+        for text in ("stream ", "from ", "server"):
+            for listener in list(self._stream_listeners):
+                listener(StreamChunk(text=text))
+            await asyncio.sleep(0.05)
+        response = PromptMessageExtended(
+            role="assistant",
+            content=[TextContent(type="text", text="stream from server")],
+        )
+        self.message_history.append(response)
+        return response
+
+
+class InputRequiredRecordingAgent(RecordingAgent):
+    waiting_for_input: bool = False
+
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del request_params
+        if isinstance(messages, PromptMessageExtended):
+            prompt = messages
+        else:
+            prompt = PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text=str(messages))],
+            )
+        self.received.append(prompt)
+        self.message_history.append(prompt)
+        if not self.waiting_for_input:
+            self.waiting_for_input = True
+            response = PromptMessageExtended(
+                role="assistant",
+                content=[TextContent(type="text", text="Please provide the missing value.")],
+                stop_reason=LlmStopReason.PAUSE,
+            )
+            self.message_history.append(response)
+            return response
+
+        self.waiting_for_input = False
+        response = PromptMessageExtended(
+            role="assistant",
+            content=[
+                TextContent(
+                    type="text",
+                    text=f"input received: {prompt.all_text()}",
+                )
+            ],
+            stop_reason=LlmStopReason.END_TURN,
+        )
+        self.message_history.append(response)
+        return response
+
+
+class NoHistoryRecordingAgent(RecordingAgent):
+    def __post_init__(self) -> None:
+        self.config = AgentConfig(
+            name=self.name,
+            agent_type=self.agent_type,
+            default=True,
+            use_history=False,
+        )
+
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        use_history = request_params.use_history if request_params is not None else self.config.use_history
+        if isinstance(messages, PromptMessageExtended):
+            prompt = messages
+        else:
+            prompt = PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text=str(messages))],
+            )
+        self.received.append(prompt)
+        history_len = len(self.message_history)
+        response = PromptMessageExtended(
+            role="assistant",
+            content=[
+                TextContent(
+                    type="text",
+                    text=f"server history {history_len}: {prompt.all_text()}",
+                )
+            ],
+        )
+        if use_history:
+            self.message_history.append(prompt)
+            self.message_history.append(response)
+        return response
+
+
+class NamedResponseAgent(RecordingAgent):
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del request_params
+        if isinstance(messages, PromptMessageExtended):
+            prompt = messages
+        else:
+            prompt = PromptMessageExtended(
+                role="user",
+                content=[TextContent(type="text", text=str(messages))],
+            )
+        self.received.append(prompt)
+        self.message_history.append(prompt)
+        response = PromptMessageExtended(
+            role="assistant",
+            content=[TextContent(type="text", text=f"{self.name} handled: {prompt.all_text()}")],
+        )
+        self.message_history.append(response)
+        return response
+
+
+class CancellableRecordingAgent(RecordingAgent):
+    def __init__(self, name: str = "worker") -> None:
+        super().__init__(name=name)
+        self.started = asyncio.Event()
+        self.cancelled = asyncio.Event()
+
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del messages, request_params
+        self.started.set()
+        try:
+            await asyncio.sleep(30)
+        except asyncio.CancelledError:
+            self.cancelled.set()
+            raise
+        return PromptMessageExtended(
+            role="assistant",
+            content=[TextContent(type="text", text="not cancelled")],
+        )
+
+
+class TokenEchoAgent(RecordingAgent):
+    async def generate(self, messages: Any, request_params: Any = None) -> PromptMessageExtended:
+        del messages, request_params
+        return PromptMessageExtended(
+            role="assistant",
+            content=[
+                TextContent(
+                    type="text",
+                    text=request_bearer_token.get() or "missing",
+                )
+            ],
+        )
+
+
+@dataclass(frozen=True)
+class RunningFastAgentA2AServer:
+    base_url: str
+    server: AgentA2AServer
+    created_agents: list[RecordingAgent]
+
+
+def _instance(agent: RecordingAgent) -> AgentInstance:
+    protocol_agent = cast("AgentProtocol", agent)
+    return AgentInstance(
+        app=AgentApp({agent.name: protocol_agent}),
+        agents={agent.name: protocol_agent},
+    )
+
+
+def _multi_agent_instance(*agents: RecordingAgent) -> AgentInstance:
+    protocol_agents = {agent.name: cast("AgentProtocol", agent) for agent in agents}
+    return AgentInstance(
+        app=AgentApp(protocol_agents),
+        agents=protocol_agents,
+    )
+
+
+@pytest_asyncio.fixture
+async def fast_agent_a2a_server(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> AsyncIterator[RunningFastAgentA2AServer]:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[RecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = RecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    bootstrap = _instance(RecordingAgent(name="worker"))
+    server = AgentA2AServer(
+        primary_instance=bootstrap,
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    try:
+        yield RunningFastAgentA2AServer(
+            base_url=f"http://{host}:{port}",
+            server=server,
+            created_agents=created_agents,
+        )
+    finally:
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+        assert disposed
+
+
+@pytest_asyncio.fixture
+async def streaming_fast_agent_a2a_server(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> AsyncIterator[RunningFastAgentA2AServer]:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[RecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = StreamingRecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    bootstrap = _instance(StreamingRecordingAgent(name="worker"))
+    server = AgentA2AServer(
+        primary_instance=bootstrap,
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent streaming test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    try:
+        yield RunningFastAgentA2AServer(
+            base_url=f"http://{host}:{port}",
+            server=server,
+            created_agents=created_agents,
+        )
+    finally:
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+        assert disposed
+
+
+@pytest_asyncio.fixture
+async def input_required_fast_agent_a2a_server(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> AsyncIterator[RunningFastAgentA2AServer]:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[RecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = InputRequiredRecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    bootstrap = _instance(InputRequiredRecordingAgent(name="worker"))
+    server = AgentA2AServer(
+        primary_instance=bootstrap,
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent input required test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    try:
+        yield RunningFastAgentA2AServer(
+            base_url=f"http://{host}:{port}",
+            server=server,
+            created_agents=created_agents,
+        )
+    finally:
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+        assert disposed
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_serves_jsonrpc_agent_with_context_sessions(
+    fast_agent_a2a_server: RunningFastAgentA2AServer,
+) -> None:
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=True),
+        a2a_config=A2AAgentConfig(url=fast_agent_a2a_server.base_url, transport="JSONRPC"),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="first")],
+                )
+            ]
+        )
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="second")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+
+    assert first.all_text() == "server saw 1: first"
+    assert second.all_text() == "server saw 3: second"
+    assert len(fast_agent_a2a_server.created_agents) == 1
+    assert fast_agent_a2a_server.server.agent_card.name == "fast-agent test server"
+    assert {
+        interface.protocol_binding
+        for interface in fast_agent_a2a_server.server.agent_card.supported_interfaces
+    } == {"JSONRPC", "HTTP+JSON"}
+    skills = {skill.id: skill for skill in fast_agent_a2a_server.server.agent_card.skills}
+    assert set(skills) == {"worker"}
+    assert skills["worker"].name == "worker"
+    assert skills["worker"].description == "Send a message to the worker fast-agent agent."
+    assert list(skills["worker"].tags) == ["fast-agent", "basic"]
+    assert list(fast_agent_a2a_server.server.agent_card.default_input_modes) == [
+        "text/plain",
+        "application/json",
+        "application/octet-stream",
+        "image/*",
+    ]
+    assert list(fast_agent_a2a_server.server.agent_card.default_output_modes) == [
+        "text/plain",
+        "application/json",
+        "application/octet-stream",
+        "image/*",
+    ]
+    assert list(skills["worker"].input_modes) == [
+        "text/plain",
+        "application/json",
+        "application/octet-stream",
+        "image/*",
+    ]
+    assert list(skills["worker"].output_modes) == [
+        "text/plain",
+        "application/json",
+        "application/octet-stream",
+        "image/*",
+    ]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_without_history_uses_fresh_server_contexts(
+    fast_agent_a2a_server: RunningFastAgentA2AServer,
+) -> None:
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_no_history", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=fast_agent_a2a_server.base_url, transport="JSONRPC"),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="first")],
+                )
+            ]
+        )
+        first_context_id = client.context_id
+        assert first_context_id
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="second")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+
+    assert first.all_text() == "server saw 1: first"
+    assert second.all_text() == "server saw 1: second"
+    assert client.context_id != first_context_id
+    assert len(fast_agent_a2a_server.created_agents) == 2
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_shared_instance_scope_reuses_primary_instance(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[RecordingAgent] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = RecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(RecordingAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent shared scope test server",
+        host=host,
+        port=port,
+        instance_scope="shared",
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_shared", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=f"http://{host}:{port}", transport="JSONRPC"),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="first")],
+                )
+            ]
+        )
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="second")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert first.all_text() == "server saw 1: first"
+    assert second.all_text() == "server saw 3: second"
+    assert not created_agents
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_request_instance_scope_disposes_each_turn(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[RecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = RecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(RecordingAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent request scope test server",
+        host=host,
+        port=port,
+        instance_scope="request",
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_request", agent_type=AgentType.A2A, use_history=True),
+        a2a_config=A2AAgentConfig(url=f"http://{host}:{port}", transport="JSONRPC"),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="first")],
+                )
+            ]
+        )
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="second")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert first.all_text() == "server saw 1: first"
+    assert second.all_text() == "server saw 1: second"
+    assert len(created_agents) == 2
+    assert len(disposed) == 2
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_routes_to_agent_skill_named_in_metadata(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[tuple[NamedResponseAgent, NamedResponseAgent]] = []
+    disposed: list[AgentInstance] = []
+
+    def agent_pair() -> tuple[NamedResponseAgent, NamedResponseAgent]:
+        primary = NamedResponseAgent(name="primary")
+        primary.config.default = True
+        specialist = NamedResponseAgent(name="specialist")
+        specialist.config.default = False
+        specialist.config.description = "Handle specialist work."
+        return primary, specialist
+
+    async def create_instance() -> AgentInstance:
+        primary, specialist = agent_pair()
+        created_agents.append((primary, specialist))
+        return _multi_agent_instance(primary, specialist)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    bootstrap_primary, bootstrap_specialist = agent_pair()
+    server = AgentA2AServer(
+        primary_instance=_multi_agent_instance(bootstrap_primary, bootstrap_specialist),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent routing test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    http_client = httpx.AsyncClient()
+    client = await create_client(
+        f"http://{host}:{port}",
+        client_config=ClientConfig(
+            httpx_client=http_client,
+            supported_protocol_bindings=["JSONRPC"],
+        ),
+    )
+    response_text: str | None = None
+    try:
+        async for event in client.send_message(
+            SendMessageRequest(
+                message=Message(
+                    role=Role.ROLE_USER,
+                    message_id="target-specialist",
+                    parts=[Part(text="route this")],
+                    metadata={"agent": "specialist"},
+                )
+            )
+        ):
+            if event.HasField("artifact_update"):
+                artifact_parts = event.artifact_update.artifact.parts
+                if artifact_parts and artifact_parts[0].HasField("text"):
+                    response_text = artifact_parts[0].text
+    finally:
+        await client.close()
+        await http_client.aclose()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+
+    skills = {skill.id: skill for skill in server.agent_card.skills}
+    assert set(skills) == {"primary", "specialist"}
+    assert skills["specialist"].description == "Handle specialist work."
+    assert response_text == "specialist handled: route this"
+    assert created_agents
+    primary, specialist = created_agents[0]
+    assert not primary.received
+    assert len(specialist.received) == 1
+    assert disposed
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_context_does_not_force_agent_history(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[NoHistoryRecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = NoHistoryRecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(NoHistoryRecordingAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent no-history test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=f"http://{host}:{port}", transport="JSONRPC"),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="first")],
+                )
+            ]
+        )
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="second")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert first.all_text() == "server history 0: first"
+    assert second.all_text() == "server history 0: second"
+    assert len(created_agents) == 2
+    assert disposed
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_cancel_task_cancels_running_agent(
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    created_agents: list[CancellableRecordingAgent] = []
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        agent = CancellableRecordingAgent(name="worker")
+        created_agents.append(agent)
+        return _instance(agent)
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(CancellableRecordingAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent cancellation test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    server_task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    http_client = httpx.AsyncClient()
+    client = await create_client(
+        f"http://{host}:{port}",
+        client_config=ClientConfig(
+            httpx_client=http_client,
+            supported_protocol_bindings=["JSONRPC"],
+        ),
+    )
+    events: list[Any] = []
+    stream_error: BaseException | None = None
+
+    async def consume_stream() -> None:
+        nonlocal stream_error
+        try:
+            async for event in client.send_message(
+                SendMessageRequest(
+                    message=Message(
+                        role=Role.ROLE_USER,
+                        message_id="cancel-me",
+                        parts=[Part(text="please wait")],
+                    )
+                )
+            ):
+                events.append(event)
+        except BaseException as exc:
+            stream_error = exc
+
+    stream_task = asyncio.create_task(consume_stream())
+    try:
+        deadline = asyncio.get_running_loop().time() + 5
+        while not created_agents and asyncio.get_running_loop().time() < deadline:
+            await asyncio.sleep(0.01)
+        assert created_agents
+        await asyncio.wait_for(created_agents[0].started.wait(), timeout=5)
+        task_id = next(event.task.id for event in events if event.HasField("task"))
+
+        cancelled = await client.cancel_task(CancelTaskRequest(id=task_id))
+
+        assert cancelled.status.state == TaskState.TASK_STATE_CANCELED
+        await asyncio.wait_for(created_agents[0].cancelled.wait(), timeout=5)
+        fetched = await client.get_task(GetTaskRequest(id=task_id))
+        listed = await client.list_tasks(ListTasksRequest())
+    finally:
+        stream_task.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await stream_task
+        await client.close()
+        await http_client.aclose()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(server_task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert stream_error is None or isinstance(stream_error, asyncio.CancelledError)
+    assert fetched.status.state == TaskState.TASK_STATE_CANCELED
+    assert any(task.id == task_id and task.status.state == TaskState.TASK_STATE_CANCELED for task in listed.tasks)
+    assert disposed
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_does_not_advertise_wildcard_bind_host() -> None:
+    agent = RecordingAgent(name="worker")
+
+    async def create_instance() -> AgentInstance:
+        return _instance(RecordingAgent(name="worker"))
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        del instance
+
+    server = AgentA2AServer(
+        primary_instance=_instance(agent),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent wildcard test server",
+        host="0.0.0.0",
+        port=41241,
+    )
+
+    static_urls = {interface.url for interface in server.agent_card.supported_interfaces}
+    assert static_urls == {
+        "http://localhost:41241/a2a/jsonrpc",
+        "http://localhost:41241/a2a/rest",
+    }
+
+    client = TestClient(server.asgi_app(), base_url="http://agent.example:41241")
+    response = client.get("/.well-known/agent-card.json")
+    response.raise_for_status()
+
+    urls = {interface["url"] for interface in response.json()["supportedInterfaces"]}
+    assert urls == {
+        "http://agent.example:41241/a2a/jsonrpc",
+        "http://agent.example:41241/a2a/rest",
+    }
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_uses_public_url_env_for_dynamic_card(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    agent = RecordingAgent(name="worker")
+
+    async def create_instance() -> AgentInstance:
+        return _instance(RecordingAgent(name="worker"))
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        del instance
+
+    monkeypatch.setenv("FAST_AGENT_PUBLIC_URL", "https://agent.example")
+    server = AgentA2AServer(
+        primary_instance=_instance(agent),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent public URL test server",
+        host="0.0.0.0",
+        port=41241,
+    )
+
+    client = TestClient(server.asgi_app(), base_url="http://internal.example:41241")
+    response = client.get("/.well-known/agent-card.json")
+    response.raise_for_status()
+
+    urls = {interface["url"] for interface in response.json()["supportedInterfaces"]}
+    assert urls == {
+        "https://agent.example/a2a/jsonrpc",
+        "https://agent.example/a2a/rest",
+    }
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_preserves_raw_file_input_parts() -> None:
+    prompt = _prompt_from_a2a_message(
+        Message(
+            role=Role.ROLE_USER,
+            message_id="file-input",
+            parts=[
+                Part(
+                    raw=b"%PDF test bytes",
+                    media_type="application/pdf",
+                    filename="report.pdf",
+                )
+            ],
+        )
+    )
+
+    assert len(prompt.content) == 1
+    content = prompt.content[0]
+    assert isinstance(content, EmbeddedResource)
+    assert isinstance(content.resource, BlobResourceContents)
+    assert str(content.resource.uri) == "attachment:///report.pdf"
+    assert content.resource.mimeType == "application/pdf"
+    assert content.resource.blob == "JVBERiB0ZXN0IGJ5dGVz"
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_maps_raw_image_input_parts() -> None:
+    prompt = _prompt_from_a2a_message(
+        Message(
+            role=Role.ROLE_USER,
+            message_id="image-input",
+            parts=[
+                Part(
+                    raw=b"image bytes",
+                    media_type="image/png",
+                    filename="chart.png",
+                )
+            ],
+        )
+    )
+
+    assert len(prompt.content) == 1
+    content = prompt.content[0]
+    assert isinstance(content, ImageContent)
+    assert content.mimeType == "image/png"
+    assert content.data == "aW1hZ2UgYnl0ZXM="
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_preserves_raw_audio_as_blob_resource() -> None:
+    prompt = _prompt_from_a2a_message(
+        Message(
+            role=Role.ROLE_USER,
+            message_id="audio-input",
+            parts=[
+                Part(
+                    raw=b"audio bytes",
+                    media_type="audio/wav",
+                    filename="clip.wav",
+                )
+            ],
+        )
+    )
+
+    assert len(prompt.content) == 1
+    content = prompt.content[0]
+    assert isinstance(content, EmbeddedResource)
+    assert isinstance(content.resource, BlobResourceContents)
+    assert str(content.resource.uri) == "attachment:///clip.wav"
+    assert content.resource.mimeType == "audio/wav"
+    assert content.resource.blob == "YXVkaW8gYnl0ZXM="
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_emits_blob_resources_as_raw_file_parts() -> None:
+    parts = _parts_from_prompt_message(
+        PromptMessageExtended(
+            role="assistant",
+            content=[
+                EmbeddedResource(
+                    type="resource",
+                    resource=BlobResourceContents(
+                        uri=AnyUrl("attachment:///report.pdf"),
+                        mimeType="application/pdf",
+                        blob="JVBERiB0ZXN0IGJ5dGVz",
+                    ),
+                )
+            ],
+        )
+    )
+
+    assert len(parts) == 1
+    assert parts[0].raw == b"%PDF test bytes"
+    assert parts[0].media_type == "application/pdf"
+    assert parts[0].filename == "report.pdf"
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_emits_json_text_resources_as_data_parts() -> None:
+    parts = _parts_from_prompt_message(
+        PromptMessageExtended(
+            role="assistant",
+            content=[
+                EmbeddedResource(
+                    type="resource",
+                    resource=TextResourceContents(
+                        uri=AnyUrl("resource:///tickets.json"),
+                        mimeType="application/json",
+                        text='{"tickets": [{"id": "REQ123", "status": "open"}]}',
+                    ),
+                )
+            ],
+        )
+    )
+
+    assert len(parts) == 1
+    assert parts[0].HasField("data")
+    assert parts[0].media_type == "application/json"
+    assert MessageToDict(parts[0])["data"] == {
+        "tickets": [{"id": "REQ123", "status": "open"}]
+    }
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_serves_http_json_transport(
+    fast_agent_a2a_server: RunningFastAgentA2AServer,
+) -> None:
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_http", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=fast_agent_a2a_server.base_url, transport="HTTP+JSON"),
+    )
+    await client.initialize()
+    try:
+        response = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="over rest")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+
+    assert response.all_text() == "server saw 1: over rest"
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_streams_live_artifact_updates_to_client(
+    streaming_fast_agent_a2a_server: RunningFastAgentA2AServer,
+) -> None:
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_stream", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url=streaming_fast_agent_a2a_server.base_url,
+            transport="JSONRPC",
+        ),
+    )
+    chunks: list[str] = []
+    client.add_stream_listener(lambda chunk: chunks.append(chunk.text))
+    await client.initialize()
+    try:
+        response = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="stream please")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+
+    assert chunks == ["stream ", "from ", "server"]
+    assert response.all_text() == "stream from server"
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_preserves_input_required_task_for_follow_up(
+    input_required_fast_agent_a2a_server: RunningFastAgentA2AServer,
+) -> None:
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_input", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url=input_required_fast_agent_a2a_server.base_url,
+            transport="JSONRPC",
+        ),
+    )
+    await client.initialize()
+    try:
+        first = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="need input")],
+                )
+            ]
+        )
+        input_task_id = client.current_task_id
+        second = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="blue")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+
+    assert first.all_text() == "A2A task TASK_STATE_INPUT_REQUIRED: Please provide the missing value."
+    assert first.stop_reason == LlmStopReason.PAUSE
+    assert input_task_id
+    assert "input received: blue" in second.all_text()
+    assert second.stop_reason == LlmStopReason.END_TURN
+    assert client.current_task_id is None
+    assert client.last_task_state == "TASK_STATE_COMPLETED"
+    assert len(input_required_fast_agent_a2a_server.created_agents) == 1
+
+
+@pytest.mark.integration
+def test_fast_agent_a2a_server_hf_auth_card_and_rejection(monkeypatch) -> None:
+    monkeypatch.setenv("FAST_AGENT_SERVE_OAUTH", "huggingface")
+    monkeypatch.setenv("FAST_AGENT_OAUTH_RESOURCE_URL", "http://testserver")
+    server = AgentA2AServer(
+        primary_instance=_instance(TokenEchoAgent(name="worker")),
+        create_instance=lambda: _async_instance(TokenEchoAgent(name="worker")),
+        dispose_instance=_async_dispose_instance,
+        server_name="fast-agent auth test server",
+        host="127.0.0.1",
+        port=41241,
+    )
+    client = TestClient(server.asgi_app(), base_url="http://testserver")
+
+    card_response = client.get("/.well-known/agent-card.json")
+    card_response.raise_for_status()
+    payload = card_response.json()
+
+    assert "hf_bearer" in payload["securitySchemes"]
+    assert payload["securityRequirements"] == [{"schemes": {"hf_bearer": {}}}]
+    assert payload["skills"][0]["securityRequirements"] == [{"schemes": {"hf_bearer": {}}}]
+
+    rejected = client.post("/a2a/jsonrpc", json={})
+    assert rejected.status_code == 401
+    assert rejected.headers["www-authenticate"].startswith("Bearer ")
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("headers", "expected"),
+    [
+        ({"Authorization": "Bearer request-token"}, "request-token"),
+        ({"X-HF-Authorization": "Bearer hf-space-token"}, "hf-space-token"),
+    ],
+)
+async def test_fast_agent_a2a_server_passes_bearer_token_to_request_context(
+    monkeypatch,
+    unused_tcp_port: int,
+    wait_for_port,
+    headers: dict[str, str],
+    expected: str,
+) -> None:
+    monkeypatch.setenv("FAST_AGENT_SERVE_OAUTH", "huggingface")
+    monkeypatch.setenv("FAST_AGENT_OAUTH_RESOURCE_URL", "http://127.0.0.1")
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    disposed: list[AgentInstance] = []
+
+    async def create_instance() -> AgentInstance:
+        return _instance(TokenEchoAgent(name="worker"))
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        disposed.append(instance)
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(TokenEchoAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent auth propagation test server",
+        host=host,
+        port=port,
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    server_task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_auth", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url=f"http://{host}:{port}",
+            transport="JSONRPC",
+            headers=headers,
+        ),
+    )
+    await client.initialize()
+    try:
+        response = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="who am i")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(server_task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert response.all_text() == expected
+    assert disposed
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_fast_agent_a2a_server_sets_bearer_token_before_instance_creation(
+    monkeypatch,
+    unused_tcp_port: int,
+    wait_for_port,
+) -> None:
+    monkeypatch.setenv("FAST_AGENT_SERVE_OAUTH", "huggingface")
+    monkeypatch.setenv("FAST_AGENT_OAUTH_RESOURCE_URL", "http://127.0.0.1")
+    host = "127.0.0.1"
+    port = unused_tcp_port
+    tokens_seen: list[str | None] = []
+
+    async def create_instance() -> AgentInstance:
+        tokens_seen.append(request_bearer_token.get())
+        return _instance(TokenEchoAgent(name="worker"))
+
+    async def dispose_instance(instance: AgentInstance) -> None:
+        await instance.shutdown()
+
+    server = AgentA2AServer(
+        primary_instance=_instance(TokenEchoAgent(name="worker")),
+        create_instance=create_instance,
+        dispose_instance=dispose_instance,
+        server_name="fast-agent auth early propagation test server",
+        host=host,
+        port=port,
+        instance_scope="request",
+    )
+    uvicorn_server = uvicorn.Server(
+        uvicorn.Config(server.asgi_app(), host=host, port=port, log_level="warning")
+    )
+    server_task = asyncio.create_task(uvicorn_server.serve())
+    await wait_for_port(host, port, timeout=5.0)
+
+    client = A2ARemoteAgent(
+        config=AgentConfig(name="remote_auth", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url=f"http://{host}:{port}",
+            transport="JSONRPC",
+            headers={"Authorization": "Bearer request-token"},
+        ),
+    )
+    await client.initialize()
+    try:
+        response = await client.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="who am i")],
+                )
+            ]
+        )
+    finally:
+        await client.shutdown()
+        uvicorn_server.should_exit = True
+        await asyncio.wait_for(server_task, timeout=5.0)
+        await server.executor.shutdown()
+
+    assert response.all_text() == "request-token"
+    assert tokens_seen == ["request-token"]
+
+
+def test_bearer_token_from_call_context_prefers_saved_request_state() -> None:
+    context = SimpleNamespace(
+        call_context=SimpleNamespace(
+            state={
+                "fast_agent_bearer_token": "saved-token",
+                "headers": {"authorization": "Bearer header-token"},
+            }
+        )
+    )
+
+    assert _bearer_token_from_call_context(cast("Any", context)) == "saved-token"
+
+
+async def _async_instance(agent: RecordingAgent) -> AgentInstance:
+    return _instance(agent)
+
+
+async def _async_dispose_instance(instance: AgentInstance) -> None:
+    await instance.shutdown()
diff --git a/tests/integration/a2a/test_remote_agent_runtime.py b/tests/integration/a2a/test_remote_agent_runtime.py
new file mode 100644
index 000000000..15e2c6a7b
--- /dev/null
+++ b/tests/integration/a2a/test_remote_agent_runtime.py
@@ -0,0 +1,377 @@
+from __future__ import annotations
+
+import pytest
+from mcp.types import TextContent
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.types import LlmStopReason, PromptMessageExtended
+from tests.integration.a2a.conftest import FAKE_A2A_HELP, LONG_STREAM_CHUNKS
+
+
+async def _send_text(base_url: str, transport: str) -> A2ARemoteAgent:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(
+            name=f"remote_{transport.lower().replace('+', '_')}",
+            agent_type=AgentType.A2A,
+            use_history=False,
+        ),
+        a2a_config=A2AAgentConfig(url=base_url, transport=transport),
+    )
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text=f"hello over {transport}")],
+                )
+            ]
+        )
+        assert f"echo: hello over {transport}" in response.all_text()
+        assert agent.remote_card is not None
+        assert agent.remote_card.name == "fast-agent test A2A server"
+        assert agent.context_id
+        assert agent.last_task_state is None
+        assert agent.current_task_id is None
+        return agent
+    except Exception:
+        await agent.shutdown()
+        raise
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize("transport", ["JSONRPC", "HTTP+JSON"])
+async def test_a2a_remote_agent_sends_text_over_supported_transports(
+    a2a_test_server, transport: str
+) -> None:
+    agent = await _send_text(a2a_test_server.base_url, transport)
+    await agent.shutdown()
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_emits_stream_chunks(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_stream", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    chunks: list[str] = []
+    agent.add_stream_listener(lambda chunk: chunks.append(chunk.text))
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="please stream")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert "stream chunk one" in response.all_text()
+    assert "stream chunk two" in response.all_text()
+    assert chunks == ["stream chunk one", "stream chunk two"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_emits_long_stream_chunks(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_long_stream", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    chunks: list[str] = []
+    agent.add_stream_listener(lambda chunk: chunks.append(chunk.text))
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="please long stream")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert chunks == LONG_STREAM_CHUNKS
+    assert "Starting the remote analysis task." in response.all_text()
+    assert "Step 1 — Reading the request and identifying the goal." in response.all_text()
+    assert "Remote analysis complete." in response.all_text()
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_fake_server_help_lists_available_prompts(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_help", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="help")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert response.all_text() == FAKE_A2A_HELP
+    assert agent.context_id
+    assert agent.current_task_id is None
+    assert agent.last_task_state is None
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_preserves_input_required_task_for_follow_up(
+    a2a_test_server,
+) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_input", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        first = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="need input")],
+                )
+            ]
+        )
+        input_task_id = agent.current_task_id
+
+        second = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="blue")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert first.all_text() == "A2A task TASK_STATE_INPUT_REQUIRED: Please provide the missing value."
+    assert first.stop_reason == LlmStopReason.PAUSE
+    assert input_task_id
+    assert "input received: blue" in second.all_text()
+    assert second.stop_reason == LlmStopReason.END_TURN
+    assert agent.current_task_id is None
+    assert agent.last_task_state == "TASK_STATE_COMPLETED"
+    assert a2a_test_server.executor.seen_queries[-2:] == ["need input", "blue"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_fake_server_help_does_not_complete_input_required_task(
+    a2a_test_server,
+) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_input_help", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="need input")],
+                )
+            ]
+        )
+        input_task_id = agent.current_task_id
+        help_response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="help")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert input_task_id
+    assert agent.current_task_id == input_task_id
+    assert agent.last_task_state == "TASK_STATE_INPUT_REQUIRED"
+    assert "Fake A2A server commands:" in help_response.all_text()
+    assert "Current task is still waiting for input." in help_response.all_text()
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_renders_file_url_data_and_raw_parts(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_files", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="respond with files")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    text = response.all_text()
+    assert "file response" in text
+    assert "[report.pdf](https://example.com/report.pdf) (application/pdf)" in text
+    assert '"ok": true' in text
+    assert '"count": 2.0' in text
+    assert "[note.txt: 3 bytes text/plain]" in text
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_honors_artifact_append_semantics(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_append", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    chunks: list[str] = []
+    agent.add_stream_listener(lambda chunk: chunks.append(chunk.text))
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="artifact append")],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert response.all_text() == "final\nrepeat\nrepeat"
+    assert chunks == ["draft", "final", "\nrepeat", "\nrepeat"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_clone_preserves_remote_config(a2a_test_server) -> None:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_clone", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="HTTP+JSON"),
+    )
+    await agent.initialize()
+    clone: A2ARemoteAgent | None = None
+    try:
+        clone = await agent.spawn_detached_instance(name="remote_clone[tool]")
+        response = await clone.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="hello clone")],
+                )
+            ]
+        )
+    finally:
+        if clone is not None:
+            await clone.shutdown()
+        await agent.shutdown()
+
+    assert clone.a2a_config == agent.a2a_config
+    assert "echo: hello clone" in response.all_text()
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_sends_url_and_raw_parts(a2a_test_server) -> None:
+    from mcp.types import ImageContent, ResourceLink
+    from pydantic import AnyUrl
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote_attachments", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        response = await agent.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[
+                        TextContent(type="text", text="inspect attachment"),
+                        ResourceLink(
+                            type="resource_link",
+                            name="report.pdf",
+                            uri=AnyUrl("https://example.com/report.pdf"),
+                            mimeType="application/pdf",
+                        ),
+                        ImageContent(type="image", data="YWJj", mimeType="image/png"),
+                    ],
+                )
+            ]
+        )
+    finally:
+        await agent.shutdown()
+
+    assert "echo: inspect attachment [text,url,raw]" in response.all_text()
+    assert a2a_test_server.executor.seen_part_kinds[-1] == ["text", "url", "raw"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_a2a_connect_command_adds_runtime_agent(a2a_test_server) -> None:
+    from fast_agent.core.agent_app import AgentApp
+    from fast_agent.ui.command_payloads import A2ACommand
+    from fast_agent.ui.interactive.command_dispatch import dispatch_command_payload
+    from fast_agent.ui.interactive_prompt import InteractivePrompt
+
+    initial = A2ARemoteAgent(
+        config=AgentConfig(name="initial", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url=a2a_test_server.base_url, transport="JSONRPC"),
+    )
+    await initial.initialize()
+    app = AgentApp({"initial": initial})
+    owner = InteractivePrompt(agent_types={"initial": AgentType.A2A})
+    try:
+        result = await dispatch_command_payload(
+            owner,
+            A2ACommand(
+                action="connect",
+                argument=f"{a2a_test_server.base_url} --transport http --name connected",
+            ),
+            prompt_provider=app,
+            agent="initial",
+            available_agents=["initial"],
+            available_agents_set={"initial"},
+            merge_pinned_agents=lambda names: names,
+        )
+        assert result.next_agent == "connected"
+        assert result.available_agents_set == {"initial", "connected"}
+        connected = app.get_agent("connected")
+        assert isinstance(connected, A2ARemoteAgent)
+        response = await connected.generate_impl(
+            [
+                PromptMessageExtended(
+                    role="user",
+                    content=[TextContent(type="text", text="hello connected")],
+                )
+            ]
+        )
+        assert "echo: hello connected" in response.all_text()
+    finally:
+        for remote in app.registered_agents().values():
+            if isinstance(remote, A2ARemoteAgent):
+                await remote.shutdown()
diff --git a/tests/unit/fast_agent/a2a_connect_test.py b/tests/unit/fast_agent/a2a_connect_test.py
new file mode 100644
index 000000000..ad18942b4
--- /dev/null
+++ b/tests/unit/fast_agent/a2a_connect_test.py
@@ -0,0 +1,60 @@
+from fast_agent.a2a.connect import (
+    normalize_a2a_transport,
+    normalize_a2a_url,
+    parse_a2a_connect_arguments,
+)
+
+
+def test_normalize_a2a_transport_aliases() -> None:
+    assert normalize_a2a_transport("json-rpc") == "JSONRPC"
+    assert normalize_a2a_transport("rest") == "HTTP+JSON"
+    assert normalize_a2a_transport("grpc") is None
+    assert normalize_a2a_transport("bogus") is None
+
+
+def test_normalize_a2a_base_url() -> None:
+    url, card_path, error = normalize_a2a_url("http://127.0.0.1:41241/")
+    assert url == "http://127.0.0.1:41241"
+    assert card_path is None
+    assert error is None
+
+
+def test_normalize_a2a_agent_card_url() -> None:
+    url, card_path, error = normalize_a2a_url(
+        "http://127.0.0.1:41241/.well-known/agent-card.json"
+    )
+    assert url == "http://127.0.0.1:41241"
+    assert card_path == "/.well-known/agent-card.json"
+    assert error is None
+
+
+def test_parse_a2a_connect_arguments() -> None:
+    request, error = parse_a2a_connect_arguments(
+        'http://127.0.0.1:41241 --transport rest --name "remote docs" --card-path /card.json'
+    )
+    assert error is None
+    assert request is not None
+    assert request.url == "http://127.0.0.1:41241"
+    assert request.transport == "HTTP+JSON"
+    assert request.name == "remote_docs"
+    assert request.relative_card_path == "/card.json"
+
+
+def test_parse_a2a_connect_oauth_switches() -> None:
+    request, error = parse_a2a_connect_arguments("http://127.0.0.1:41241 --oauth")
+    assert error is None
+    assert request is not None
+    assert request.auth is not None
+    assert request.auth.oauth is True
+
+    request, error = parse_a2a_connect_arguments("http://127.0.0.1:41241 --no-oauth")
+    assert error is None
+    assert request is not None
+    assert request.auth is not None
+    assert request.auth.oauth is False
+
+
+def test_parse_a2a_connect_rejects_endpointless_url() -> None:
+    request, error = parse_a2a_connect_arguments("127.0.0.1:41241")
+    assert request is None
+    assert error == "A2A connect expects an http(s) base URL or agent-card URL"
diff --git a/tests/unit/fast_agent/cli/test_a2a_go_options.py b/tests/unit/fast_agent/cli/test_a2a_go_options.py
new file mode 100644
index 000000000..bbc1f7ea2
--- /dev/null
+++ b/tests/unit/fast_agent/cli/test_a2a_go_options.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+
+import pytest
+import typer
+
+from fast_agent.cli.commands.go import _materialize_a2a_agent_cards
+
+
+def test_materialize_a2a_agent_card() -> None:
+    tempdir, paths = _materialize_a2a_agent_cards(
+        ["http://127.0.0.1:41241/.well-known/agent-card.json"],
+        transport="rest",
+        oauth=True,
+    )
+    try:
+        assert len(paths) == 1
+        text = Path(paths[0]).read_text(encoding="utf-8")
+        assert "type: a2a" in text
+        assert "name: a2a_remote" in text
+        assert "url: http://127.0.0.1:41241" in text
+        assert "transport: HTTP+JSON" in text
+        assert "auth:" in text
+        assert "  oauth: true" in text
+        assert "relative_card_path: /.well-known/agent-card.json" in text
+    finally:
+        tempdir.cleanup()
+
+
+def test_materialize_a2a_agent_card_with_auth_uses_authorization_for_hf_space() -> None:
+    tempdir, paths = _materialize_a2a_agent_cards(
+        ["https://demo.hf.space"],
+        transport="jsonrpc",
+        auth_token="Bearer explicit-token",
+    )
+    try:
+        text = Path(paths[0]).read_text(encoding="utf-8")
+        assert "headers:" in text
+        assert "  Authorization: 'Bearer explicit-token'" in text
+        assert "X-HF-Authorization" not in text
+    finally:
+        tempdir.cleanup()
+
+
+def test_materialize_a2a_rejects_bad_transport() -> None:
+    with pytest.raises(typer.BadParameter):
+        _materialize_a2a_agent_cards(["http://127.0.0.1:41241"], transport="bogus")
diff --git a/tests/unit/fast_agent/cli/test_a2a_serve_options.py b/tests/unit/fast_agent/cli/test_a2a_serve_options.py
new file mode 100644
index 000000000..97600109f
--- /dev/null
+++ b/tests/unit/fast_agent/cli/test_a2a_serve_options.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from typer.testing import CliRunner
+
+from fast_agent.cli.commands import serve as serve_command
+
+if TYPE_CHECKING:
+    from fast_agent.cli.runtime.run_request import AgentRunRequest
+
+
+def test_serve_a2a_command_builds_a2a_run_request(monkeypatch) -> None:
+    captured: list[AgentRunRequest] = []
+
+    def fake_run_request(request: AgentRunRequest) -> None:
+        captured.append(request)
+
+    monkeypatch.setattr(serve_command, "run_request", fake_run_request)
+
+    result = CliRunner().invoke(
+        serve_command.app,
+        [
+            "a2a",
+            "--name",
+            "research-a2a",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            "41241",
+            "--instance-scope",
+            "connection",
+            "--agent-cards",
+            "./agents",
+            "--model",
+            "passthrough",
+            "--noenv",
+        ],
+    )
+
+    assert result.exit_code == 0
+    assert len(captured) == 1
+    request = captured[0]
+    assert request.name == "research-a2a"
+    assert request.mode == "serve"
+    assert request.transport == "a2a"
+    assert request.host == "127.0.0.1"
+    assert request.port == 41241
+    assert request.instance_scope == "connection"
+    assert request.agent_cards == ["./agents"]
+    assert request.model == "passthrough"
+    assert request.noenv is True
+
+
+def test_serve_transport_a2a_callback_path_builds_a2a_run_request(monkeypatch) -> None:
+    captured: list[AgentRunRequest] = []
+
+    def fake_run_request(request: AgentRunRequest) -> None:
+        captured.append(request)
+
+    monkeypatch.setattr(serve_command, "run_request", fake_run_request)
+
+    result = CliRunner().invoke(
+        serve_command.app,
+        [
+            "--transport",
+            "a2a",
+            "--name",
+            "generic-a2a",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            "41242",
+            "--instance-scope",
+            "request",
+            "--noenv",
+        ],
+    )
+
+    assert result.exit_code == 0
+    assert len(captured) == 1
+    request = captured[0]
+    assert request.name == "generic-a2a"
+    assert request.transport == "a2a"
+    assert request.instance_scope == "request"
diff --git a/tests/unit/fast_agent/commands/test_cli_completion_help.py b/tests/unit/fast_agent/commands/test_cli_completion_help.py
index c6a109ecf..4fc8865fc 100644
--- a/tests/unit/fast_agent/commands/test_cli_completion_help.py
+++ b/tests/unit/fast_agent/commands/test_cli_completion_help.py
@@ -1,5 +1,6 @@
 from typer.testing import CliRunner
 
+from fast_agent.cli import main as cli_main
 from fast_agent.cli.commands import (
     acp,
     auth,
@@ -14,6 +15,7 @@
     serve,
     setup,
 )
+from fast_agent.cli.main import app as root_app
 
 
 def test_command_help_hides_typer_completion_options():
@@ -33,8 +35,25 @@ def test_command_help_hides_typer_completion_options():
         quickstart.app,
     ]
 
-    for app in command_apps:
-        result = runner.invoke(app, ["--help"], terminal_width=160)
+    for command_app in command_apps:
+        result = runner.invoke(command_app, ["--help"], terminal_width=160)
         assert result.exit_code == 0
         assert "--install-completion" not in result.output
         assert "--show-completion" not in result.output
+
+
+def test_root_help_uses_lazy_command_metadata(monkeypatch):
+    imported_modules: list[str] = []
+
+    def fail_import(name: str):
+        imported_modules.append(name)
+        raise AssertionError(f"root help should not import lazy command module: {name}")
+
+    monkeypatch.setattr(cli_main.importlib, "import_module", fail_import)
+
+    result = CliRunner().invoke(root_app, ["--help"], terminal_width=160)
+
+    assert result.exit_code == 0
+    assert "go" in result.output
+    assert "Run an interactive agent" in result.output
+    assert imported_modules == []
diff --git a/tests/unit/fast_agent/commands/test_cli_main_routing.py b/tests/unit/fast_agent/commands/test_cli_main_routing.py
index 02639049c..350776f23 100644
--- a/tests/unit/fast_agent/commands/test_cli_main_routing.py
+++ b/tests/unit/fast_agent/commands/test_cli_main_routing.py
@@ -90,6 +90,16 @@ def test_auto_routes_to_go_when_no_shell_used_at_root() -> None:
     assert "--no-shell" in output
 
 
+def test_auto_routes_to_go_when_a2a_used_at_root() -> None:
+    result = _run_fast_agent_cli("--a2a", "http://localhost:8001", "--help")
+    output = strip_ansi(result.stdout)
+
+    assert result.returncode == 0, result.stderr
+    assert "go [OPTIONS] COMMAND" in output
+    assert "--a2a" in output
+    assert "--a2a-transport" in output
+
+
 def test_resume_sentinel_is_not_added_for_batch(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
diff --git a/tests/unit/fast_agent/commands/test_mcp_runtime_handlers.py b/tests/unit/fast_agent/commands/test_mcp_runtime_handlers.py
index cc0d9fa6b..25c214bc7 100644
--- a/tests/unit/fast_agent/commands/test_mcp_runtime_handlers.py
+++ b/tests/unit/fast_agent/commands/test_mcp_runtime_handlers.py
@@ -889,7 +889,7 @@ async def test_handle_mcp_connect_hf_url_adds_hf_auth_from_env(monkeypatch) -> N
     assert any("Connected MCP server" in str(msg.text) for msg in outcome.messages)
     assert manager.last_config is not None
     assert manager.last_config.headers is not None
-    assert manager.last_config.headers.get("Authorization") == "Bearer hf_test_token"
+    assert "Authorization" not in manager.last_config.headers
     assert manager.last_config.headers.get("X-HF-Authorization") == "Bearer hf_test_token"
 
 
diff --git a/tests/unit/fast_agent/commands/test_runtime_model_picker_bootstrap.py b/tests/unit/fast_agent/commands/test_runtime_model_picker_bootstrap.py
index 6e47366ff..be30044fa 100644
--- a/tests/unit/fast_agent/commands/test_runtime_model_picker_bootstrap.py
+++ b/tests/unit/fast_agent/commands/test_runtime_model_picker_bootstrap.py
@@ -20,6 +20,7 @@
 from fast_agent.cli.runtime.agent_setup import (
     _emit_model_picker_keyring_notice,
     _explicit_agent_cards_define_startup_model,
+    _explicit_agent_cards_satisfy_startup_model,
     _generic_model_prompt_default,
     _load_request_settings,
     _normalize_generic_model_spec,
@@ -60,6 +61,7 @@ def _make_request(
     prompt_file: str | None = None,
     agent_cards: list[str] | None = None,
     card_tools: list[str] | None = None,
+    target_agent_name: str | None = None,
 ) -> AgentRunRequest:
     return AgentRunRequest(
         name="test",
@@ -76,7 +78,7 @@ def _make_request(
         url_servers=None,
         stdio_servers=None,
         agent_name="agent",
-        target_agent_name=None,
+        target_agent_name=target_agent_name,
         skills_directory=None,
         environment_dir=None,
         noenv=False,
@@ -191,6 +193,30 @@ def fake_read_text_source(source: str, *, label: str) -> str:
     assert _explicit_agent_cards_define_startup_model(request) is False
 
 
+def test_explicit_a2a_agent_card_satisfies_startup_model_without_local_model(
+    tmp_path: Path,
+) -> None:
+    card_path = tmp_path / "remote.yaml"
+    card_path.write_text(
+        "\n".join(
+            [
+                "type: a2a",
+                "name: a2a_remote",
+                "url: http://127.0.0.1:41242",
+                "",
+            ]
+        ),
+        encoding="utf-8",
+    )
+    request = _make_request(
+        agent_cards=[str(card_path)],
+        target_agent_name="a2a_remote",
+    )
+
+    assert _explicit_agent_cards_define_startup_model(request) is False
+    assert _explicit_agent_cards_satisfy_startup_model(request) is True
+
+
 @pytest.mark.parametrize(
     ("model_references", "expected"),
     [
diff --git a/tests/unit/fast_agent/commands/test_serve_command.py b/tests/unit/fast_agent/commands/test_serve_command.py
index 7e9cb62d1..e29f529cf 100644
--- a/tests/unit/fast_agent/commands/test_serve_command.py
+++ b/tests/unit/fast_agent/commands/test_serve_command.py
@@ -1,9 +1,17 @@
+from types import SimpleNamespace
+from typing import TYPE_CHECKING, Any, cast
+
 import click
 import pytest
 import typer
+from typer.testing import CliRunner
 
 from fast_agent.cli.commands import go as go_command
 from fast_agent.cli.commands import serve as serve_command
+from fast_agent.core.fastagent import FastAgent
+
+if TYPE_CHECKING:
+    from fast_agent.cli.runtime.run_request import AgentRunRequest
 
 
 def test_run_async_agent_passes_serve_mode() -> None:
@@ -134,6 +142,120 @@ def test_serve_command_noenv_forces_permissions_disabled() -> None:
     assert request.permissions_enabled is False
 
 
+def test_serve_command_builds_a2a_request() -> None:
+    ctx = typer.Context(click.Command("serve"))
+    request = serve_command._build_run_request(
+        ctx=ctx,
+        name="fast-agent-a2a",
+        instruction=None,
+        config_path=None,
+        servers=None,
+        agent_cards=["./agents"],
+        card_tools=None,
+        urls=None,
+        auth=None,
+        client_metadata_url=None,
+        model=None,
+        skills_dir=None,
+        env_dir=None,
+        noenv=False,
+        force_smart=False,
+        npx=None,
+        uvx=None,
+        stdio=None,
+        description=None,
+        tool_name_template=None,
+        transport=serve_command.ServeTransport.A2A,
+        host="127.0.0.1",
+        port=41241,
+        shell=False,
+        instance_scope=serve_command.InstanceScope.SHARED,
+        no_permissions=False,
+        reload=False,
+        watch=False,
+    )
+
+    assert request.mode == "serve"
+    assert request.transport == "a2a"
+    assert request.host == "127.0.0.1"
+    assert request.port == 41241
+    assert request.instance_scope == "shared"
+
+
+def test_serve_a2a_subcommand_builds_a2a_request(monkeypatch) -> None:
+    captured: dict[str, object] = {}
+
+    def fake_run_request(request: object) -> None:
+        captured["request"] = request
+
+    monkeypatch.setattr(serve_command, "run_request", fake_run_request)
+
+    result = CliRunner().invoke(
+        serve_command.app,
+        [
+            "a2a",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            "41241",
+            "--agent-cards",
+            "./agents",
+            "--model",
+            "codexresponses.gpt-5.4-mini",
+        ],
+    )
+
+    assert result.exit_code == 0, result.output
+    request = cast("AgentRunRequest", captured["request"])
+    assert request.mode == "serve"
+    assert request.transport == "a2a"
+    assert request.name == "fast-agent-a2a"
+    assert request.host == "127.0.0.1"
+    assert request.port == 41241
+    assert request.agent_cards == ["./agents"]
+    assert request.model == "codexresponses.gpt-5.4-mini"
+
+
+@pytest.mark.asyncio
+async def test_fastagent_run_a2a_server_passes_instance_scope(monkeypatch) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeA2AServer:
+        def __init__(self, **kwargs: Any) -> None:
+            captured.update(kwargs)
+
+        async def run_async(self, *, host: str, port: int) -> None:
+            captured["run_host"] = host
+            captured["run_port"] = port
+
+    monkeypatch.setattr("fast_agent.a2a.AgentA2AServer", FakeA2AServer)
+
+    fast = FastAgent.__new__(FastAgent)
+    fast.name = "fast-agent-a2a"
+    fast.args = SimpleNamespace(
+        server_description=None,
+        server_name=None,
+        host="127.0.0.1",
+        port=41241,
+        instance_scope="request",
+    )
+    state = SimpleNamespace(primary_instance=object())
+    callbacks = SimpleNamespace(
+        create_instance=object(),
+        dispose_instance=object(),
+    )
+
+    run_a2a_server = cast("Any", FastAgent._run_a2a_server)
+    await run_a2a_server(fast, state, callbacks)
+
+    assert captured["primary_instance"] is state.primary_instance
+    assert captured["create_instance"] is callbacks.create_instance
+    assert captured["dispose_instance"] is callbacks.dispose_instance
+    assert captured["instance_scope"] == "request"
+    assert captured["run_host"] == "127.0.0.1"
+    assert captured["run_port"] == 41241
+
+
 def test_serve_command_builds_request_with_missing_shell_cwd_override() -> None:
     ctx = typer.Context(click.Command("serve"))
     request = serve_command._build_run_request(
diff --git a/tests/unit/fast_agent/core/test_a2a_error_formatting.py b/tests/unit/fast_agent/core/test_a2a_error_formatting.py
new file mode 100644
index 000000000..42f62cbc8
--- /dev/null
+++ b/tests/unit/fast_agent/core/test_a2a_error_formatting.py
@@ -0,0 +1,19 @@
+from fast_agent.core.direct_factory import _format_a2a_initialization_error
+
+
+def test_format_a2a_initialization_error_uses_cause_chain() -> None:
+    cause = TimeoutError()
+    exc = RuntimeError("wrapper")
+    exc.__cause__ = cause
+
+    message = _format_a2a_initialization_error(
+        name="a2a_remote",
+        url="http://127.0.0.1:41242",
+        transport="JSONRPC",
+        exc=exc,
+    )
+
+    assert "Unable to initialize A2A agent 'a2a_remote' via JSONRPC" in message
+    assert "http://127.0.0.1:41242" in message
+    assert "TimeoutError" in message
+    assert "Check that the A2A server is running" in message
diff --git a/tests/unit/fast_agent/core/test_agent_card_loader.py b/tests/unit/fast_agent/core/test_agent_card_loader.py
index f14c80145..0bf103a73 100644
--- a/tests/unit/fast_agent/core/test_agent_card_loader.py
+++ b/tests/unit/fast_agent/core/test_agent_card_loader.py
@@ -506,3 +506,80 @@ def test_dump_agent_card_preserves_plugin_command_actions(tmp_path: Path) -> Non
     assert "review-last:" in dumped
     assert "description: Review the last response" in dumped
     assert "handler: commands.py:review_last" in dumped
+
+
+def test_load_a2a_agent_card(tmp_path: Path) -> None:
+    card_path = tmp_path / "hello_remote.yaml"
+    card_path.write_text(
+        "\n".join(
+            [
+                "type: a2a",
+                "name: hello_remote",
+                "url: http://127.0.0.1:41241",
+                "transport: JSONRPC",
+                "auth:",
+                "  oauth: true",
+                "  persist: memory",
+                "request_timeout_seconds: 45",
+                "accepted_output_modes:",
+                "  - text",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    loaded = load_agent_cards(card_path)[0]
+    config = loaded.agent_data["config"]
+    a2a_config = loaded.agent_data["a2a"]
+
+    assert loaded.agent_data["type"] == "a2a"
+    assert config.agent_type.value == "a2a"
+    assert a2a_config.url == "http://127.0.0.1:41241"
+    assert a2a_config.transport == "JSONRPC"
+    assert a2a_config.streaming is True
+    assert a2a_config.auth is not None
+    assert a2a_config.auth.oauth is True
+    assert a2a_config.auth.persist == "memory"
+    assert a2a_config.accepted_output_modes == ["text"]
+    assert a2a_config.request_timeout_seconds == 45.0
+
+    dumped = dump_agent_to_string("hello_remote", loaded.agent_data, as_yaml=True)
+    assert "auth:" in dumped
+    assert "oauth: true" in dumped
+    assert "persist: memory" in dumped
+
+
+def test_load_a2a_agent_card_rejects_invalid_transport(tmp_path: Path) -> None:
+    card_path = tmp_path / "bad_remote.yaml"
+    card_path.write_text(
+        "\n".join(
+            [
+                "type: a2a",
+                "name: bad_remote",
+                "url: http://127.0.0.1:41241",
+                "transport: WEBSOCKET",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    with pytest.raises(AgentConfigError, match="transport"):
+        load_agent_cards(card_path)
+
+
+def test_load_a2a_agent_card_rejects_grpc_transport(tmp_path: Path) -> None:
+    card_path = tmp_path / "bad_remote.yaml"
+    card_path.write_text(
+        "\n".join(
+            [
+                "type: a2a",
+                "name: bad_remote",
+                "url: http://127.0.0.1:41241",
+                "transport: GRPC",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    with pytest.raises(AgentConfigError, match="JSONRPC, HTTP\\+JSON"):
+        load_agent_cards(card_path)
diff --git a/tests/unit/fast_agent/llm/providers/test_codex_oauth.py b/tests/unit/fast_agent/llm/providers/test_codex_oauth.py
index 3f0a7405e..f97ba5e24 100644
--- a/tests/unit/fast_agent/llm/providers/test_codex_oauth.py
+++ b/tests/unit/fast_agent/llm/providers/test_codex_oauth.py
@@ -42,6 +42,87 @@ def test_explicit_auth_json_path_overrides_keyring(monkeypatch, tmp_path: Path)
     assert tokens.access_token == "local-token"
 
 
+def test_load_codex_tokens_reuses_parsed_tokens_for_unchanged_keyring_payload(
+    monkeypatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.delenv("CODEX_AUTH_JSON_PATH", raising=False)
+    monkeypatch.delenv("CODEX_HOME", raising=False)
+    monkeypatch.setattr(codex_oauth.Path, "home", lambda: tmp_path)
+    monkeypatch.setattr(codex_oauth, "_codex_token_cache", None)
+    calls = 0
+
+    def _load_keyring() -> str:
+        nonlocal calls
+        calls += 1
+        return json.dumps({"access_token": "keyring-token", "token_type": "Bearer"})
+
+    monkeypatch.setattr(codex_oauth, "_get_keyring_password", _load_keyring)
+
+    first = codex_oauth.load_codex_tokens()
+    second = codex_oauth.load_codex_tokens()
+
+    assert first is not None
+    assert second is not None
+    assert first.access_token == "keyring-token"
+    assert second.access_token == "keyring-token"
+    assert first is second
+    assert calls == 1
+
+
+def test_load_codex_tokens_invalidates_cache_when_keyring_payload_changes(
+    monkeypatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.delenv("CODEX_AUTH_JSON_PATH", raising=False)
+    monkeypatch.delenv("CODEX_HOME", raising=False)
+    monkeypatch.setattr(codex_oauth.Path, "home", lambda: tmp_path)
+    monkeypatch.setattr(codex_oauth, "_codex_token_cache", None)
+    keyring_payload = json.dumps({"access_token": "initial-token", "token_type": "Bearer"})
+
+    def _load_keyring() -> str:
+        return keyring_payload
+
+    monkeypatch.setattr(codex_oauth, "_get_keyring_password", _load_keyring)
+
+    first = codex_oauth.load_codex_tokens()
+    keyring_payload = json.dumps({"access_token": "external-token", "token_type": "Bearer"})
+    second = codex_oauth.load_codex_tokens()
+    codex_oauth.clear_codex_token_cache()
+    third = codex_oauth.load_codex_tokens()
+
+    assert first is not None
+    assert second is not None
+    assert third is not None
+    assert first.access_token == "initial-token"
+    assert second.access_token == "initial-token"
+    assert third.access_token == "external-token"
+
+
+def test_load_codex_tokens_invalidates_cache_when_auth_json_changes(
+    monkeypatch,
+    tmp_path: Path,
+) -> None:
+    auth_path = tmp_path / "local-auth.json"
+    monkeypatch.setenv("CODEX_AUTH_JSON_PATH", str(auth_path))
+    monkeypatch.delenv("CODEX_HOME", raising=False)
+    monkeypatch.setattr(codex_oauth, "_codex_token_cache", None)
+
+    auth_path.write_text(
+        json.dumps({"tokens": {"access_token": "initial-token", "token_type": "Bearer"}})
+    )
+    first = codex_oauth.load_codex_tokens()
+    auth_path.write_text(
+        json.dumps({"tokens": {"access_token": "external-token", "token_type": "Bearer"}})
+    )
+    second = codex_oauth.load_codex_tokens()
+
+    assert first is not None
+    assert second is not None
+    assert first.access_token == "initial-token"
+    assert second.access_token == "external-token"
+
+
 def test_save_codex_tokens_writes_local_auth_file_without_keyring(monkeypatch, tmp_path: Path) -> None:
     auth_path = tmp_path / ".codex" / "auth.json"
     auth_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/tests/unit/fast_agent/llm/providers/test_google_stream_replay.py b/tests/unit/fast_agent/llm/providers/test_google_stream_replay.py
index f3877c553..960759465 100644
--- a/tests/unit/fast_agent/llm/providers/test_google_stream_replay.py
+++ b/tests/unit/fast_agent/llm/providers/test_google_stream_replay.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import importlib
+import json
 import sys
 from pathlib import Path
 from typing import Any
@@ -152,6 +153,13 @@ async def test_google_stream_plain_text_reconstructs_final_response() -> None:
     final_response = await harness._consume_google_stream(
         stream,
         model="gemini-2.0-flash",
+        diagnostics={
+            "transport": "google-genai-stream",
+            "request_type": "models.generate_content_stream",
+            "streaming": True,
+            "model": "gemini-2.0-flash",
+            "phase_ms": {},
+        },
     )
 
     assert stream.closed is True
@@ -171,6 +179,19 @@ async def test_google_stream_plain_text_reconstructs_final_response() -> None:
     parts = content.parts or []
     assert len(parts) == 1
     assert parts[0].text == "Hello"
+    diagnostics = harness._last_google_provider_diagnostics
+    assert diagnostics is not None
+    assert diagnostics["transport"] == "google-genai-stream"
+    assert diagnostics["streaming"] is True
+    assert diagnostics["stream"] == {
+        "chunk_count": 2,
+        "text_chunks": 2,
+        "reasoning_chunks": 0,
+        "function_call_chunks": 0,
+        "usage_metadata_seen": False,
+    }
+    assert diagnostics["phase_ms"]["first_event"] is not None
+    json.dumps(diagnostics)
 
 
 @pytest.mark.unit
@@ -187,6 +208,13 @@ async def test_google_stream_reasoning_emits_stream_events_without_text_tool_eve
     final_response = await harness._consume_google_stream(
         stream,
         model="gemini-2.0-flash",
+        diagnostics={
+            "transport": "google-genai-stream",
+            "request_type": "models.generate_content_stream",
+            "streaming": True,
+            "model": "gemini-2.0-flash",
+            "phase_ms": {},
+        },
     )
 
     assert stream.closed is True
@@ -238,6 +266,13 @@ async def test_google_stream_tool_call_uses_final_arguments_and_closes_stream()
     final_response = await harness._consume_google_stream(
         stream,
         model="gemini-2.0-flash",
+        diagnostics={
+            "transport": "google-genai-stream",
+            "request_type": "models.generate_content_stream",
+            "streaming": True,
+            "model": "gemini-2.0-flash",
+            "phase_ms": {},
+        },
     )
 
     assert stream.closed is True
@@ -278,6 +313,10 @@ async def test_google_stream_tool_call_uses_final_arguments_and_closes_stream()
     assert len(parts) == 1
     assert parts[0].function_call is not None
     assert parts[0].function_call.name == "weather"
+    diagnostics = harness._last_google_provider_diagnostics
+    assert diagnostics is not None
+    assert diagnostics["stream"]["function_call_chunks"] == 2
+    assert diagnostics["stream"]["text_chunks"] == 0
     assert parts[0].function_call.args == {"city": "Paris"}
     assert parts[0].function_call.id == "call_weather"
     assert parts[0].thought_signature == b"sig"
diff --git a/tests/unit/fast_agent/llm/providers/test_llm_google_vertex.py b/tests/unit/fast_agent/llm/providers/test_llm_google_vertex.py
index 2601df8e1..5399a7727 100644
--- a/tests/unit/fast_agent/llm/providers/test_llm_google_vertex.py
+++ b/tests/unit/fast_agent/llm/providers/test_llm_google_vertex.py
@@ -1,3 +1,4 @@
+import json
 import types
 from typing import TYPE_CHECKING, cast
 
@@ -10,7 +11,10 @@
 from fast_agent.config import GoogleSettings, Settings
 from fast_agent.constants import REASONING
 from fast_agent.context import Context
-from fast_agent.llm.provider.google.llm_google_native import GoogleNativeLLM
+from fast_agent.llm.provider.google.llm_google_native import (
+    GOOGLE_DIAGNOSTICS_CHANNEL,
+    GoogleNativeLLM,
+)
 from fast_agent.llm.provider_key_manager import ProviderKeyManager
 from fast_agent.mcp.prompt import Prompt
 from fast_agent.types import PromptMessageExtended, RequestParams
@@ -575,3 +579,12 @@ def _initialize_google_client(self):
     assert len(reasoning) == 1
     assert isinstance(reasoning[0], TextContent)
     assert reasoning[0].text == "private analysis"
+    diagnostics = response.channels[GOOGLE_DIAGNOSTICS_CHANNEL]
+    assert len(diagnostics) == 1
+    assert isinstance(diagnostics[0], TextContent)
+    payload = json.loads(diagnostics[0].text)
+    assert payload["transport"] == "google-genai"
+    assert payload["request_type"] == "models.generate_content"
+    assert payload["streaming"] is False
+    assert payload["model"] == "gemini-3.5-flash"
+    assert payload["phase_ms"]["total"] >= 0
diff --git a/tests/unit/fast_agent/llm/test_trace.py b/tests/unit/fast_agent/llm/test_trace.py
new file mode 100644
index 000000000..8285aeebd
--- /dev/null
+++ b/tests/unit/fast_agent/llm/test_trace.py
@@ -0,0 +1,15 @@
+import os
+
+from fast_agent.llm.trace import llm_trace_enabled, set_llm_trace_enabled, toggle_llm_trace
+
+
+def test_llm_trace_toggle_updates_runtime_state() -> None:
+    set_llm_trace_enabled(False)
+
+    assert toggle_llm_trace() is True
+    assert llm_trace_enabled() is True
+    assert os.environ["FAST_AGENT_LLM_TRACE"] == "1"
+
+    assert toggle_llm_trace() is False
+    assert llm_trace_enabled() is False
+    assert "FAST_AGENT_LLM_TRACE" not in os.environ
diff --git a/tests/unit/fast_agent/mcp/test_hf_auth.py b/tests/unit/fast_agent/mcp/test_hf_auth.py
index 3e147a905..eff84fa95 100644
--- a/tests/unit/fast_agent/mcp/test_hf_auth.py
+++ b/tests/unit/fast_agent/mcp/test_hf_auth.py
@@ -7,8 +7,12 @@
 import os
 
 from fast_agent.mcp.hf_auth import (
+    HF_CLI_AMBIENT_AUTH_POLICY,
+    HF_EXPLICIT_BEARER_AUTH_POLICY,
+    add_explicit_bearer_auth_header,
     add_hf_auth_header,
     get_hf_token_from_env,
+    is_hf_space_url,
     is_huggingface_url,
     should_add_hf_auth,
 )
@@ -96,6 +100,10 @@ def test_hf_space_http(self):
     def test_hf_space_with_port(self):
         assert is_huggingface_url("https://space.hf.space:8080/path") is True
 
+    def test_is_hf_space_url_distinguishes_hub_domain(self):
+        assert is_hf_space_url("https://space.hf.space/api") is True
+        assert is_hf_space_url("https://huggingface.co/mcp?mix=jobs") is False
+
 
 class TestGetHfTokenFromEnv:
     """Test HF_TOKEN environment variable retrieval."""
@@ -203,13 +211,11 @@ def test_adds_auth_header_for_huggingface_co(self):
             _restore_hf_token(original)
 
     def test_adds_x_hf_auth_header_for_hf_space(self):
-        """Test that .hf.space domains get both Authorization and X-HF-Authorization headers."""
+        """Test that .hf.space domains get only X-HF-Authorization."""
         original = _set_hf_token("test_token_123")
         try:
             result = add_hf_auth_header("https://myspace.hf.space/api", None)
-            # Both headers are needed: Authorization for the app, X-HF-Authorization for HF infra
             expected = {
-                "Authorization": "Bearer test_token_123",
                 "X-HF-Authorization": "Bearer test_token_123",
             }
             assert result == expected
@@ -237,11 +243,9 @@ def test_preserves_existing_headers_for_hf_space(self):
         try:
             existing = {"Content-Type": "application/json", "User-Agent": "test"}
             result = add_hf_auth_header("https://myspace.hf.space/api", existing)
-            # Both headers are needed: Authorization for the app, X-HF-Authorization for HF infra
             expected = {
                 "Content-Type": "application/json",
                 "User-Agent": "test",
-                "Authorization": "Bearer test_token_123",
                 "X-HF-Authorization": "Bearer test_token_123",
             }
             assert result == expected
@@ -293,21 +297,33 @@ def test_returns_none_when_no_token_available(self):
         finally:
             _restore_hf_token(original)
 
-    def test_case_sensitive_authorization_header(self):
-        """Test that Authorization header check is case-sensitive as per HTTP spec."""
+    def test_lowercase_authorization_header_prevents_ambient_auth(self):
         original = _set_hf_token("test_token_123")
         try:
-            # Lower case 'authorization' should not prevent HF auth
             existing = {"authorization": "Bearer existing_token"}
             result = add_hf_auth_header("https://hf.co/models", existing)
-            expected = {
-                "authorization": "Bearer existing_token",
-                "Authorization": "Bearer test_token_123",
-            }
-            assert result == expected
+            assert result == existing
         finally:
             _restore_hf_token(original)
 
+    def test_explicit_bearer_auth_uses_authorization_for_hf_space(self):
+        result = add_explicit_bearer_auth_header("https://demo.hf.space/a2a/jsonrpc", None, "abc")
+        assert result == {"Authorization": "Bearer abc"}
+
+    def test_policies_make_hf_space_header_choice_explicit(self):
+        assert (
+            HF_CLI_AMBIENT_AUTH_POLICY.header_for_url("https://demo.hf.space/api")
+            == "X-HF-Authorization"
+        )
+        assert (
+            HF_EXPLICIT_BEARER_AUTH_POLICY.header_for_url("https://demo.hf.space/a2a/jsonrpc")
+            == "Authorization"
+        )
+        assert (
+            HF_CLI_AMBIENT_AUTH_POLICY.header_for_url("https://huggingface.co/mcp?mix=jobs")
+            == "Authorization"
+        )
+
 
 class TestHfSpaceAntiSpoofing:
     """Test comprehensive anti-spoofing measures for .hf.space domains."""
@@ -436,13 +452,10 @@ def test_hf_token_correctly_added_to_valid_hf_spaces(self):
             for url in valid_urls:
                 result = add_hf_auth_header(url, None)
                 assert result is not None, f"Should add auth to: {url}"
-                # Both headers are needed: Authorization for the app, X-HF-Authorization for HF infra
                 assert result["X-HF-Authorization"] == "Bearer test_token_123", (
                     f"Incorrect X-HF-Authorization for: {url}"
                 )
-                assert result["Authorization"] == "Bearer test_token_123", (
-                    f"Incorrect Authorization for: {url}"
-                )
+                assert "Authorization" not in result
         finally:
             _restore_hf_token(original)
 
diff --git a/tests/unit/fast_agent/mcp/test_mcp_aggregator_nonpersistent.py b/tests/unit/fast_agent/mcp/test_mcp_aggregator_nonpersistent.py
index bd4c38181..d32ae5dd7 100644
--- a/tests/unit/fast_agent/mcp/test_mcp_aggregator_nonpersistent.py
+++ b/tests/unit/fast_agent/mcp/test_mcp_aggregator_nonpersistent.py
@@ -5,6 +5,7 @@
 import pytest
 from mcp.shared.exceptions import McpError
 from mcp.types import (
+    CallToolResult,
     ErrorData,
     Implementation,
     InitializeResult,
@@ -13,12 +14,14 @@
     Prompt,
     PromptsCapability,
     ServerCapabilities,
+    TextContent,
     Tool,
     ToolsCapability,
 )
 
-from fast_agent.config import MCPServerSettings
+from fast_agent.config import MCPServerAuthSettings, MCPServerSettings
 from fast_agent.context import Context
+from fast_agent.mcp.auth.context import request_bearer_token
 from fast_agent.mcp.gen_client import gen_client
 from fast_agent.mcp.interfaces import ServerInitializerProtocol
 from fast_agent.mcp.mcp_aggregator import (
@@ -304,6 +307,69 @@ async def _fake_gen_client(
     assert trigger_history == [None, True]
 
 
+@pytest.mark.asyncio
+async def test_execute_on_server_uses_request_scoped_connection_for_forwarded_hf_auth(
+    monkeypatch,
+) -> None:
+    context = _build_context(
+        {
+            "hf": MCPServerSettings(
+                name="hf",
+                transport="http",
+                url="https://huggingface.co/mcp",
+                auth=MCPServerAuthSettings(forward="huggingface"),
+            )
+        }
+    )
+    aggregator = MCPAggregator(
+        server_names=["hf"],
+        connection_persistence=True,
+        context=context,
+    )
+
+    class _PersistentManager:
+        async def get_server(self, *args, **kwargs):  # noqa: ANN002, ANN003
+            del args, kwargs
+            raise AssertionError("persistent connection must not be reused for forwarded auth")
+
+    class _RequestClient:
+        async def call_tool(self, **kwargs):  # noqa: ANN003
+            del kwargs
+            return CallToolResult(content=[TextContent(type="text", text="ok")])
+
+    gen_client_calls: list[str] = []
+
+    @asynccontextmanager
+    async def _fake_gen_client(
+        server_name,
+        server_registry,
+        client_session_factory=_DummySession,
+        *,
+        trigger_oauth=None,
+    ):
+        del server_registry, client_session_factory, trigger_oauth
+        gen_client_calls.append(server_name)
+        yield _RequestClient()
+
+    monkeypatch.setattr("fast_agent.mcp.mcp_aggregator.gen_client", _fake_gen_client)
+    aggregator._persistent_connection_manager = cast("MCPConnectionManager", _PersistentManager())
+
+    token = request_bearer_token.set("request-token")
+    try:
+        result = await aggregator._execute_on_server(
+            "hf",
+            "tools/call",
+            "hf_whoami",
+            "call_tool",
+            method_args={"name": "hf_whoami", "arguments": {}},
+        )
+    finally:
+        request_bearer_token.reset(token)
+
+    assert isinstance(result, CallToolResult)
+    assert gen_client_calls == ["hf"]
+
+
 # ---------------------------------------------------------------------------
 # get_capabilities (non-persistent path)
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/fast_agent/mcp/test_mcp_connection_manager.py b/tests/unit/fast_agent/mcp/test_mcp_connection_manager.py
index dc954c4b6..6d685037e 100644
--- a/tests/unit/fast_agent/mcp/test_mcp_connection_manager.py
+++ b/tests/unit/fast_agent/mcp/test_mcp_connection_manager.py
@@ -9,8 +9,9 @@
 from anyio import create_task_group
 from mcp import ClientSession
 
-from fast_agent.config import MCPServerSettings
+from fast_agent.config import MCPServerAuthSettings, MCPServerSettings
 from fast_agent.core.exceptions import ServerInitializationError
+from fast_agent.mcp.auth.context import request_bearer_token
 from fast_agent.mcp.interfaces import ClientSessionFactory
 from fast_agent.mcp.mcp_connection_manager import (
     MCPConnectionManager,
@@ -124,6 +125,77 @@ def _builder(_config, **_kwargs):
     assert user_keys == set()
 
 
+def test_prepare_headers_forwards_hf_request_token() -> None:
+    config = MCPServerSettings(
+        name="test",
+        transport="http",
+        url="https://huggingface.co/mcp",
+        auth=MCPServerAuthSettings(forward="huggingface"),
+    )
+
+    saved_token = request_bearer_token.set("request-token")
+    try:
+        headers, auth, user_keys = _prepare_headers_and_auth(config, trigger_oauth=True)
+    finally:
+        request_bearer_token.reset(saved_token)
+
+    assert headers == {"Authorization": "Bearer request-token"}
+    assert auth is None
+    assert user_keys == {"Authorization"}
+
+
+def test_forward_hf_config_does_not_capture_env_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("HF_TOKEN", "env-token")
+
+    config = MCPServerSettings(
+        name="test",
+        transport="http",
+        url="https://huggingface.co/mcp",
+        auth=MCPServerAuthSettings(forward="huggingface"),
+    )
+
+    assert config.headers is None
+
+
+def test_prepare_headers_forwards_hf_space_request_token() -> None:
+    config = MCPServerSettings(
+        name="test",
+        transport="http",
+        url="https://demo.hf.space/mcp",
+        auth=MCPServerAuthSettings(forward="huggingface"),
+    )
+
+    saved_token = request_bearer_token.set("request-token")
+    try:
+        headers, auth, user_keys = _prepare_headers_and_auth(config, trigger_oauth=True)
+    finally:
+        request_bearer_token.reset(saved_token)
+
+    assert headers == {"X-HF-Authorization": "Bearer request-token"}
+    assert auth is None
+    assert user_keys == {"X-HF-Authorization"}
+
+
+def test_prepare_headers_forward_preserves_explicit_authorization() -> None:
+    config = MCPServerSettings(
+        name="test",
+        transport="http",
+        url="https://huggingface.co/mcp",
+        headers={"Authorization": "Bearer explicit"},
+        auth=MCPServerAuthSettings(forward="huggingface"),
+    )
+
+    saved_token = request_bearer_token.set("request-token")
+    try:
+        headers, auth, user_keys = _prepare_headers_and_auth(config, trigger_oauth=True)
+    finally:
+        request_bearer_token.reset(saved_token)
+
+    assert headers == {"Authorization": "Bearer explicit"}
+    assert auth is None
+    assert user_keys == {"Authorization"}
+
+
 @pytest.mark.asyncio
 async def test_managed_http_transport_context_closes_client_after_transport() -> None:
     class _FakeClient:
diff --git a/tests/unit/fast_agent/test_a2a_remote_agent_config.py b/tests/unit/fast_agent/test_a2a_remote_agent_config.py
new file mode 100644
index 000000000..07e06a405
--- /dev/null
+++ b/tests/unit/fast_agent/test_a2a_remote_agent_config.py
@@ -0,0 +1,668 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from a2a.types import (
+    AgentCapabilities,
+    AgentCard,
+    AgentInterface,
+    AgentProvider,
+    AgentSkill,
+    AuthorizationCodeOAuthFlow,
+    HTTPAuthSecurityScheme,
+    OAuth2SecurityScheme,
+    OAuthFlows,
+    SecurityRequirement,
+    SecurityScheme,
+    StringList,
+)
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import SUPPORTED_A2A_HTTP_TRANSPORTS, A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.config import MCPServerAuthSettings
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_defaults_to_supported_http_transports(monkeypatch) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return AgentCard(
+                name="http-only",
+                description="HTTP+JSON only",
+                provider=AgentProvider(organization="test", url="https://example.com"),
+                version="1.0.0",
+                capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+                default_input_modes=["text"],
+                default_output_modes=["text"],
+                skills=[
+                    AgentSkill(
+                        id="echo",
+                        name="Echo",
+                        description="Echo input",
+                        tags=["test"],
+                        examples=["hello"],
+                        input_modes=["text"],
+                        output_modes=["text"],
+                    )
+                ],
+                supported_interfaces=[
+                    AgentInterface(
+                        protocol_binding="HTTP+JSON",
+                        protocol_version="1.0",
+                        url="http://127.0.0.1:41242/a2a/rest",
+                    )
+                ],
+            )
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_timeout"] = kwargs.get("timeout")
+            captured["httpx_headers"] = kwargs.get("headers")
+
+        async def aclose(self) -> None:
+            return None
+
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="http://127.0.0.1:41242"),
+    )
+    await agent.initialize()
+    try:
+        client_config = captured["client_config"]
+        assert client_config.supported_protocol_bindings == SUPPORTED_A2A_HTTP_TRANSPORTS
+        assert captured["httpx_timeout"] == 120.0
+        assert captured["httpx_headers"] is None
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_adds_hf_auth_headers_for_hf_space(
+    monkeypatch,
+) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return AgentCard(
+                name="hf-space",
+                description="HF Space",
+                provider=AgentProvider(organization="test", url="https://example.com"),
+                version="1.0.0",
+                capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+                default_input_modes=["text"],
+                default_output_modes=["text"],
+                skills=[
+                    AgentSkill(
+                        id="echo",
+                        name="Echo",
+                        description="Echo input",
+                        tags=["test"],
+                        examples=["hello"],
+                        input_modes=["text"],
+                        output_modes=["text"],
+                    )
+                ],
+                supported_interfaces=[
+                    AgentInterface(
+                        protocol_binding="JSONRPC",
+                        protocol_version="1.0",
+                        url="https://demo.hf.space/a2a/jsonrpc",
+                    )
+                ],
+            )
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_headers"] = kwargs.get("headers")
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    monkeypatch.setenv("HF_TOKEN", "hf-test-token")
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="https://demo.hf.space"),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_headers"] == {
+            "X-HF-Authorization": "Bearer hf-test-token",
+        }
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_switches_hf_space_bearer_card_to_endpoint_auth(
+    monkeypatch,
+) -> None:
+    captured: dict[str, Any] = {"httpx_headers": []}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return _hf_bearer_agent_card()
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_headers"].append(kwargs.get("headers"))
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    monkeypatch.setenv("HF_TOKEN", "hf-test-token")
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="https://demo.hf.space"),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_headers"] == [
+            {"X-HF-Authorization": "Bearer hf-test-token"},
+            {"Authorization": "Bearer hf-test-token"},
+        ]
+        assert captured["client_config"].httpx_client is agent._httpx_client
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_uses_oauth_for_hf_bearer_card_without_token(
+    monkeypatch,
+) -> None:
+    captured: dict[str, Any] = {"httpx_auth": []}
+    oauth_provider = object()
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return _hf_bearer_agent_card()
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_auth"].append(kwargs.get("auth"))
+            captured["httpx_headers"] = kwargs.get("headers")
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    def fake_build_oauth_provider(server_config: Any) -> object:
+        captured["oauth_server"] = server_config
+        return oauth_provider
+
+    monkeypatch.delenv("HF_TOKEN", raising=False)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.get_hf_token_from_env", lambda: None)
+    monkeypatch.setattr("fast_agent.mcp.hf_auth.get_hf_token_from_env", lambda *_args: None)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+    monkeypatch.setattr(
+        "fast_agent.a2a.remote_agent.build_oauth_provider",
+        fake_build_oauth_provider,
+    )
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="https://demo.hf.space"),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_auth"] == [None, None, oauth_provider]
+        assert captured["oauth_server"].url == "https://demo.hf.space"
+        assert captured["client_config"].httpx_client is agent._httpx_client
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_preserves_explicit_auth_headers_for_hf_space(
+    monkeypatch,
+) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return AgentCard(
+                name="hf-space",
+                description="HF Space",
+                provider=AgentProvider(organization="test", url="https://example.com"),
+                version="1.0.0",
+                capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+                default_input_modes=["text"],
+                default_output_modes=["text"],
+                skills=[
+                    AgentSkill(
+                        id="echo",
+                        name="Echo",
+                        description="Echo input",
+                        tags=["test"],
+                        examples=["hello"],
+                        input_modes=["text"],
+                        output_modes=["text"],
+                    )
+                ],
+                supported_interfaces=[
+                    AgentInterface(
+                        protocol_binding="JSONRPC",
+                        protocol_version="1.0",
+                        url="https://demo.hf.space/a2a/jsonrpc",
+                    )
+                ],
+            )
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_headers"] = kwargs.get("headers")
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    monkeypatch.setenv("HF_TOKEN", "hf-env-token")
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+
+    explicit_headers = {"Authorization": "Bearer explicit-token"}
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="https://demo.hf.space", headers=explicit_headers),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_headers"] == explicit_headers
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_uses_configured_request_timeout(monkeypatch) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return AgentCard(
+                name="jsonrpc",
+                description="JSON-RPC",
+                provider=AgentProvider(organization="test", url="https://example.com"),
+                version="1.0.0",
+                capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+                default_input_modes=["text"],
+                default_output_modes=["text"],
+                skills=[
+                    AgentSkill(
+                        id="echo",
+                        name="Echo",
+                        description="Echo input",
+                        tags=["test"],
+                        examples=["hello"],
+                        input_modes=["text"],
+                        output_modes=["text"],
+                    )
+                ],
+                supported_interfaces=[
+                    AgentInterface(
+                        protocol_binding="JSONRPC",
+                        protocol_version="1.0",
+                        url="http://127.0.0.1:41242/a2a/jsonrpc",
+                    )
+                ],
+            )
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_timeout"] = kwargs.get("timeout")
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url="http://127.0.0.1:41242",
+            request_timeout_seconds=30.0,
+        ),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_timeout"] == 30.0
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_honors_explicit_transport(monkeypatch) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return AgentCard(
+                name="jsonrpc",
+                description="JSON-RPC",
+                provider=AgentProvider(organization="test", url="https://example.com"),
+                version="1.0.0",
+                capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+                default_input_modes=["text"],
+                default_output_modes=["text"],
+                skills=[
+                    AgentSkill(
+                        id="echo",
+                        name="Echo",
+                        description="Echo input",
+                        tags=["test"],
+                        examples=["hello"],
+                        input_modes=["text"],
+                        output_modes=["text"],
+                    )
+                ],
+                supported_interfaces=[
+                    AgentInterface(
+                        protocol_binding="JSONRPC",
+                        protocol_version="1.0",
+                        url="http://127.0.0.1:41242/a2a/jsonrpc",
+                    )
+                ],
+            )
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="http://127.0.0.1:41242", transport="JSONRPC"),
+    )
+    await agent.initialize()
+    try:
+        client_config = captured["client_config"]
+        assert client_config.supported_protocol_bindings == ["JSONRPC"]
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_enables_oauth_for_oauth_agent_card(monkeypatch) -> None:
+    captured: dict[str, Any] = {"httpx_auth": []}
+    oauth_provider = object()
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return _oauth_agent_card()
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    class FakeAsyncClient:
+        def __init__(self, *_args: Any, **kwargs: Any) -> None:
+            captured["httpx_auth"].append(kwargs.get("auth"))
+
+        async def aclose(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["agent_card"] = agent_card
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    def fake_build_oauth_provider(server_config: Any) -> object:
+        captured["oauth_server"] = server_config
+        return oauth_provider
+
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.httpx.AsyncClient", FakeAsyncClient)
+    monkeypatch.setattr(
+        "fast_agent.a2a.remote_agent.build_oauth_provider",
+        fake_build_oauth_provider,
+    )
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="https://agent.example.com"),
+    )
+    await agent.initialize()
+    try:
+        assert captured["httpx_auth"] == [None, oauth_provider]
+        assert captured["client_config"].httpx_client is agent._httpx_client
+        assert captured["oauth_server"].transport == "http"
+        assert captured["oauth_server"].url == "https://agent.example.com"
+    finally:
+        await agent.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_no_oauth_disables_advertised_oauth(monkeypatch) -> None:
+    captured: dict[str, Any] = {}
+
+    class FakeResolver:
+        def __init__(self, *_args: Any, **_kwargs: Any) -> None:
+            return None
+
+        async def get_agent_card(self) -> AgentCard:
+            return _oauth_agent_card()
+
+    class FakeClient:
+        async def close(self) -> None:
+            return None
+
+    async def fake_create_client(agent_card: AgentCard, *, client_config: Any) -> FakeClient:
+        captured["client_config"] = client_config
+        return FakeClient()
+
+    def fail_build_oauth_provider(_server_config: Any) -> object:
+        raise AssertionError("OAuth provider should not be built")
+
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.A2ACardResolver", FakeResolver)
+    monkeypatch.setattr("fast_agent.a2a.remote_agent.create_client", fake_create_client)
+    monkeypatch.setattr(
+        "fast_agent.a2a.remote_agent.build_oauth_provider",
+        fail_build_oauth_provider,
+    )
+
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(
+            url="https://agent.example.com",
+            auth=MCPServerAuthSettings(oauth=False),
+        ),
+    )
+    await agent.initialize()
+    try:
+        assert captured["client_config"].httpx_client is agent._httpx_client
+    finally:
+        await agent.shutdown()
+
+
+def _oauth_agent_card() -> AgentCard:
+    return AgentCard(
+        name="oauth-agent",
+        description="OAuth Agent",
+        provider=AgentProvider(organization="test", url="https://example.com"),
+        version="1.0.0",
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        default_input_modes=["text"],
+        default_output_modes=["text"],
+        skills=[
+            AgentSkill(
+                id="echo",
+                name="Echo",
+                description="Echo input",
+                tags=["test"],
+                examples=["hello"],
+                input_modes=["text"],
+                output_modes=["text"],
+            )
+        ],
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url="https://agent.example.com/a2a/jsonrpc",
+            )
+        ],
+        security_requirements=[
+            SecurityRequirement(schemes={"oauth": StringList(list=["openid"])})
+        ],
+        security_schemes={
+            "oauth": SecurityScheme(
+                oauth2_security_scheme=OAuth2SecurityScheme(
+                    flows=OAuthFlows(
+                        authorization_code=AuthorizationCodeOAuthFlow(
+                            authorization_url="https://auth.example.com/authorize",
+                            token_url="https://auth.example.com/token",
+                        )
+                    )
+                )
+            )
+        },
+    )
+
+
+def _hf_bearer_agent_card() -> AgentCard:
+    return AgentCard(
+        name="hf-bearer-agent",
+        description="HF bearer protected agent",
+        provider=AgentProvider(organization="test", url="https://example.com"),
+        version="1.0.0",
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        default_input_modes=["text"],
+        default_output_modes=["text"],
+        skills=[
+            AgentSkill(
+                id="echo",
+                name="Echo",
+                description="Echo input",
+                tags=["test"],
+                examples=["hello"],
+                input_modes=["text"],
+                output_modes=["text"],
+                security_requirements=[
+                    SecurityRequirement(schemes={"hf_bearer": StringList(list=[])})
+                ],
+            )
+        ],
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url="https://demo.hf.space/a2a/jsonrpc",
+            )
+        ],
+        security_requirements=[
+            SecurityRequirement(schemes={"hf_bearer": StringList(list=[])})
+        ],
+        security_schemes={
+            "hf_bearer": SecurityScheme(
+                http_auth_security_scheme=HTTPAuthSecurityScheme(
+                    scheme="bearer",
+                    bearer_format="HF_TOKEN",
+                    description="Hugging Face bearer token",
+                )
+            )
+        },
+    )
diff --git a/tests/unit/fast_agent/test_a2a_remote_agent_events.py b/tests/unit/fast_agent/test_a2a_remote_agent_events.py
new file mode 100644
index 000000000..d5a4fdba9
--- /dev/null
+++ b/tests/unit/fast_agent/test_a2a_remote_agent_events.py
@@ -0,0 +1,311 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Any, cast
+
+import pytest
+from a2a.types import (
+    Artifact,
+    Message,
+    Part,
+    Role,
+    StreamResponse,
+    Task,
+    TaskArtifactUpdateEvent,
+    TaskState,
+    TaskStatus,
+)
+from google.protobuf.json_format import MessageToDict
+from mcp.types import EmbeddedResource, TextContent, TextResourceContents
+from pydantic import AnyUrl
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent, _parts_from_messages
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.types import PromptMessageExtended
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator, Iterator
+
+    from fast_agent.llm.stream_types import StreamChunk
+
+
+async def _events(*events: StreamResponse) -> AsyncIterator[StreamResponse]:
+    for event in events:
+        yield event
+
+
+def _remote_agent() -> A2ARemoteAgent:
+    return A2ARemoteAgent(
+        config=AgentConfig(name="remote", agent_type=AgentType.A2A, use_history=False),
+        a2a_config=A2AAgentConfig(url="http://127.0.0.1:41242"),
+    )
+
+
+class _FakeStreamHandle:
+    def __init__(self, *, preserve: bool) -> None:
+        self.preserve = preserve
+        self.chunks: list[str] = []
+        self.finalized = False
+
+    def update_chunk(self, chunk: StreamChunk) -> None:
+        self.chunks.append(chunk.text)
+
+    async def wait_for_drain(self) -> None:
+        return
+
+    def preserve_final_frame(self) -> bool:
+        return self.preserve and bool(self.chunks)
+
+    def finalize(self, message: PromptMessageExtended) -> None:
+        del message
+        self.finalized = True
+
+
+class _FakeDisplay:
+    def __init__(self, *, preserve: bool = True) -> None:
+        self.handle = _FakeStreamHandle(preserve=preserve)
+        self.assistant_messages: list[PromptMessageExtended] = []
+
+    def show_user_message(self, *_args: object, **_kwargs: object) -> None:
+        return
+
+    @contextmanager
+    def streaming_assistant_message(self, **_kwargs: object) -> Iterator[_FakeStreamHandle]:
+        yield self.handle
+
+    async def show_assistant_message(
+        self,
+        message: PromptMessageExtended,
+        **_kwargs: object,
+    ) -> None:
+        self.assistant_messages.append(message)
+
+
+class _FakeClient:
+    def __init__(self, events: list[StreamResponse]) -> None:
+        self.events = events
+        self.requests: list[object] = []
+
+    def send_message(self, request: object) -> AsyncIterator[StreamResponse]:
+        self.requests.append(request)
+        return _events(*self.events)
+
+
+def _artifact_update(
+    text: str,
+    *,
+    append: bool = False,
+    last_chunk: bool = False,
+) -> StreamResponse:
+    return StreamResponse(
+        artifact_update=TaskArtifactUpdateEvent(
+            task_id="task-1",
+            context_id="ctx-1",
+            artifact=Artifact(name="response", parts=[Part(text=text)]),
+            append=append,
+            last_chunk=last_chunk,
+        )
+    )
+
+
+def test_a2a_remote_agent_starts_without_client_generated_context_id() -> None:
+    agent = _remote_agent()
+
+    assert agent.context_id is None
+    assert agent.current_task_id is None
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_clears_task_id_for_terminal_full_task_event() -> None:
+    agent = _remote_agent()
+    agent.current_task_id = "previous-task"
+
+    result = await agent._consume_events(
+        _events(
+            StreamResponse(
+                task=Task(
+                    id="terminal-task",
+                    context_id="ctx-1",
+                    status=TaskStatus(state=TaskState.TASK_STATE_COMPLETED),
+                    artifacts=[Artifact(name="response", parts=[Part(text="done")])],
+                )
+            )
+        )
+    )
+
+    assert result.text == "done"
+    assert result.state == "TASK_STATE_COMPLETED"
+    assert agent.context_id == "ctx-1"
+    assert agent.last_task_state == "TASK_STATE_COMPLETED"
+    assert agent.current_task_id is None
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_keeps_task_id_for_input_required_full_task_event() -> None:
+    agent = _remote_agent()
+
+    result = await agent._consume_events(
+        _events(
+            StreamResponse(
+                task=Task(
+                    id="input-task",
+                    context_id="ctx-2",
+                    status=TaskStatus(state=TaskState.TASK_STATE_INPUT_REQUIRED),
+                )
+            )
+        )
+    )
+
+    assert result.state == "TASK_STATE_INPUT_REQUIRED"
+    assert agent.context_id == "ctx-2"
+    assert agent.last_task_state == "TASK_STATE_INPUT_REQUIRED"
+    assert agent.current_task_id == "input-task"
+
+
+def test_a2a_remote_agent_clears_context_for_no_history_completed_turns() -> None:
+    agent = _remote_agent()
+    agent.context_id = "ctx-completed"
+    agent.current_task_id = None
+    agent.last_task_state = "TASK_STATE_COMPLETED"
+
+    agent._prepare_turn_state(use_history=False)
+
+    assert agent.context_id is None
+    assert agent.current_task_id is None
+    assert agent.last_task_state is None
+
+
+def test_a2a_remote_agent_keeps_input_required_task_for_no_history_follow_up() -> None:
+    agent = _remote_agent()
+    agent.context_id = "ctx-input"
+    agent.current_task_id = "task-input"
+    agent.last_task_state = "TASK_STATE_INPUT_REQUIRED"
+
+    agent._prepare_turn_state(use_history=False)
+
+    assert agent.context_id == "ctx-input"
+    assert agent.current_task_id == "task-input"
+    assert agent.last_task_state == "TASK_STATE_INPUT_REQUIRED"
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_first_request_omits_context_and_task_ids() -> None:
+    agent = _remote_agent()
+    display = _FakeDisplay()
+    fake_client = _FakeClient(
+        [
+            StreamResponse(
+                task=Task(
+                    id="task-server",
+                    context_id="ctx-server",
+                    status=TaskStatus(state=TaskState.TASK_STATE_COMPLETED),
+                )
+            )
+        ]
+    )
+    agent.display = cast("Any", display)
+    agent._client = fake_client
+
+    await agent.generate_impl(
+        [PromptMessageExtended(role="user", content=[TextContent(type="text", text="hello")])]
+    )
+
+    request = cast("Any", fake_client.requests[0])
+    assert request.message.context_id == ""
+    assert request.message.task_id == ""
+    assert agent.context_id == "ctx-server"
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_message_only_response_updates_context_without_task_state() -> None:
+    agent = _remote_agent()
+
+    result = await agent._consume_events(
+        _events(
+            StreamResponse(
+                message=Message(
+                    role=Role.ROLE_AGENT,
+                    message_id="message-only",
+                    context_id="ctx-message",
+                    parts=[Part(text="hello")],
+                )
+            )
+        )
+    )
+
+    assert result.text == "hello"
+    assert result.state is None
+    assert agent.context_id == "ctx-message"
+    assert agent.current_task_id is None
+    assert agent.last_task_state is None
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_streams_chunks_to_live_display() -> None:
+    agent = _remote_agent()
+    display = _FakeDisplay(preserve=True)
+    agent.display = cast("Any", display)
+    agent._client = _FakeClient(
+        [
+            _artifact_update("one "),
+            _artifact_update("two", append=True, last_chunk=True),
+            StreamResponse(
+                task=Task(
+                    id="task-1",
+                    context_id="ctx-1",
+                    status=TaskStatus(state=TaskState.TASK_STATE_COMPLETED),
+                )
+            ),
+        ]
+    )
+
+    response = await agent.generate_impl(
+        [PromptMessageExtended(role="user", content=[TextContent(type="text", text="stream")])]
+    )
+
+    assert response.all_text() == "one two"
+    assert display.handle.chunks == ["one ", "two"]
+    assert display.handle.finalized
+    assert display.assistant_messages == []
+
+
+@pytest.mark.asyncio
+async def test_a2a_remote_agent_renders_final_message_when_live_display_cannot_preserve() -> None:
+    agent = _remote_agent()
+    display = _FakeDisplay(preserve=False)
+    agent.display = cast("Any", display)
+    agent._client = _FakeClient([_artifact_update("final", last_chunk=True)])
+
+    response = await agent.generate_impl(
+        [PromptMessageExtended(role="user", content=[TextContent(type="text", text="stream")])]
+    )
+
+    assert response.all_text() == "final"
+    assert display.handle.chunks == ["final"]
+    assert [message.all_text() for message in display.assistant_messages] == ["final"]
+
+
+def test_a2a_remote_agent_sends_json_text_resources_as_data_parts() -> None:
+    parts = _parts_from_messages(
+        [
+            PromptMessageExtended(
+                role="user",
+                content=[
+                    EmbeddedResource(
+                        type="resource",
+                        resource=TextResourceContents(
+                            uri=AnyUrl("resource:///query.json"),
+                            mimeType="application/json",
+                            text='{"format": "markdown", "limit": 5}',
+                        ),
+                    )
+                ],
+            )
+        ]
+    )
+
+    assert len(parts) == 1
+    assert parts[0].HasField("data")
+    assert parts[0].media_type == "application/json"
+    assert MessageToDict(parts[0])["data"] == {"format": "markdown", "limit": 5.0}
diff --git a/tests/unit/fast_agent/test_package_lazy_exports.py b/tests/unit/fast_agent/test_package_lazy_exports.py
new file mode 100644
index 000000000..531dce2bf
--- /dev/null
+++ b/tests/unit/fast_agent/test_package_lazy_exports.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import importlib
+import sys
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+
+@pytest.fixture(autouse=True)
+def restore_fast_agent_import_state() -> Iterator[None]:
+    original_modules = {
+        name: module for name, module in sys.modules.items() if name.startswith("fast_agent")
+    }
+    yield
+    for name in list(sys.modules):
+        if name.startswith("fast_agent"):
+            sys.modules.pop(name, None)
+    sys.modules.update(original_modules)
+
+
+def test_package_import_defers_config_until_public_export_access() -> None:
+    sys.modules.pop("fast_agent", None)
+    sys.modules.pop("fast_agent.config", None)
+
+    fast_agent = importlib.import_module("fast_agent")
+
+    assert "fast_agent.config" not in sys.modules
+    assert fast_agent.Settings.__name__ == "Settings"
+    assert "fast_agent.config" in sys.modules
+
+
+def test_package_import_defers_types_until_public_export_access() -> None:
+    sys.modules.pop("fast_agent", None)
+    sys.modules.pop("fast_agent.types", None)
+
+    fast_agent = importlib.import_module("fast_agent")
+
+    assert "fast_agent.types" not in sys.modules
+    assert fast_agent.RequestParams.__name__ == "RequestParams"
+    assert "fast_agent.types" in sys.modules
+
+
+def test_a2a_package_import_defers_server_stack() -> None:
+    sys.modules.pop("fast_agent.a2a", None)
+    sys.modules.pop("fast_agent.a2a.server", None)
+
+    importlib.import_module("fast_agent.a2a")
+
+    assert "fast_agent.a2a.server" not in sys.modules
+
+
+def test_a2a_connect_import_defers_config() -> None:
+    sys.modules.pop("fast_agent.a2a.connect", None)
+    sys.modules.pop("fast_agent.config", None)
+
+    connect = importlib.import_module("fast_agent.a2a.connect")
+
+    assert "fast_agent.config" not in sys.modules
+    assert connect.normalize_a2a_transport("jsonrpc") == "JSONRPC"
diff --git a/tests/unit/fast_agent/ui/test_a2a_command_dispatch.py b/tests/unit/fast_agent/ui/test_a2a_command_dispatch.py
new file mode 100644
index 000000000..9c08d801d
--- /dev/null
+++ b/tests/unit/fast_agent/ui/test_a2a_command_dispatch.py
@@ -0,0 +1,154 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, cast
+
+import pytest
+from a2a.types import AgentCard, AgentInterface, AgentProvider
+
+from fast_agent.a2a.config import A2AAgentConfig
+from fast_agent.a2a.remote_agent import A2ARemoteAgent
+from fast_agent.agents.agent_types import AgentConfig, AgentType
+from fast_agent.core.agent_app import AgentApp
+from fast_agent.ui.command_payloads import A2ACommand
+from fast_agent.ui.interactive import command_dispatch
+from fast_agent.ui.interactive.command_dispatch import dispatch_command_payload
+from fast_agent.ui.interactive_prompt import InteractivePrompt
+
+if TYPE_CHECKING:
+    from fast_agent.interfaces import AgentProtocol
+
+
+class _SelectedTransport:
+    pass
+
+
+def _remote_agent(*, name: str = "remote") -> A2ARemoteAgent:
+    agent = A2ARemoteAgent(
+        config=AgentConfig(name=name, agent_type=AgentType.A2A, use_history=True),
+        a2a_config=A2AAgentConfig(url="http://127.0.0.1:41242", transport="JSONRPC"),
+    )
+    agent.context_id = "ctx-current"
+    agent.current_task_id = "task-current"
+    agent.last_task_state = "TASK_STATE_INPUT_REQUIRED"
+    agent.remote_card = AgentCard(
+        name="Remote A2A",
+        description="Deterministic remote A2A agent.",
+        provider=AgentProvider(organization="tests", url="https://example.com"),
+        version="1.0",
+        supported_interfaces=[
+            AgentInterface(
+                protocol_binding="JSONRPC",
+                protocol_version="1.0",
+                url="http://127.0.0.1:41242/a2a/jsonrpc",
+            )
+        ],
+    )
+    agent._client = _SelectedTransport()
+    return agent
+
+
+async def _dispatch(
+    owner: InteractivePrompt,
+    app: AgentApp,
+    payload: A2ACommand,
+) -> command_dispatch.DispatchResult:
+    return await dispatch_command_payload(
+        owner,
+        payload,
+        prompt_provider=app,
+        agent="remote",
+        available_agents=list(app.registered_agents()),
+        available_agents_set=set(app.registered_agents()),
+        merge_pinned_agents=lambda names: names,
+    )
+
+
+def _app(agents: dict[str, object]) -> AgentApp:
+    return AgentApp(cast("dict[str, AgentProtocol]", agents))
+
+
+@pytest.mark.asyncio
+async def test_a2a_tui_dispatch_reports_status_transport_and_card(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    printed: list[str] = []
+    monkeypatch.setattr(
+        command_dispatch,
+        "rich_print",
+        lambda value="", *args, **kwargs: printed.append(str(value)),
+    )
+    remote = _remote_agent()
+    app = _app({"remote": remote})
+    owner = InteractivePrompt(agent_types={"remote": AgentType.A2A})
+
+    status = await _dispatch(owner, app, A2ACommand(action="status", argument=None))
+    transport = await _dispatch(owner, app, A2ACommand(action="transport", argument="remote"))
+    card = await _dispatch(owner, app, A2ACommand(action="card", argument="remote"))
+
+    assert status.handled
+    assert transport.handled
+    assert card.handled
+    output = "\n".join(printed)
+    assert "A2A status: remote" in output
+    assert "Context: ctx-current" in output
+    assert "Task: task-current" in output
+    assert "Last state: TASK_STATE_INPUT_REQUIRED" in output
+    assert "A2A transport: remote" in output
+    assert "Requested: JSONRPC" in output
+    assert "Selected client: _SelectedTransport" in output
+    assert "A2A card: Remote A2A" in output
+    assert "JSONRPC 1.0: http://127.0.0.1:41242/a2a/jsonrpc" in output
+
+
+@pytest.mark.asyncio
+async def test_a2a_tui_dispatch_lists_and_resets_remote_agents(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    printed: list[str] = []
+    monkeypatch.setattr(
+        command_dispatch,
+        "rich_print",
+        lambda value="", *args, **kwargs: printed.append(str(value)),
+    )
+    remote = _remote_agent()
+    local = object()
+    app = _app({"remote": remote, "local": local})
+    owner = InteractivePrompt(agent_types={"remote": AgentType.A2A, "local": AgentType.BASIC})
+
+    listed = await _dispatch(owner, app, A2ACommand(action="list", argument=None))
+    reset = await _dispatch(owner, app, A2ACommand(action="reset", argument="remote"))
+
+    assert listed.handled
+    assert reset.handled
+    assert "  • remote" in printed
+    assert all("local" not in line for line in printed)
+    assert remote.context_id is None
+    assert remote.current_task_id is None
+    assert remote.last_task_state is None
+
+
+@pytest.mark.asyncio
+async def test_a2a_tui_dispatch_rejects_a2a_commands_for_local_agent(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    printed: list[str] = []
+    monkeypatch.setattr(
+        command_dispatch,
+        "rich_print",
+        lambda value="", *args, **kwargs: printed.append(str(value)),
+    )
+    app = _app({"local": object()})
+    owner = InteractivePrompt(agent_types={"local": AgentType.BASIC})
+
+    result = await dispatch_command_payload(
+        owner,
+        A2ACommand(action="status", argument="local"),
+        prompt_provider=app,
+        agent="local",
+        available_agents=["local"],
+        available_agents_set={"local"},
+        merge_pinned_agents=lambda names: names,
+    )
+
+    assert result.handled
+    assert "Agent 'local' is not an A2A agent." in "\n".join(printed)
diff --git a/tests/unit/fast_agent/ui/test_command_intent_contract.py b/tests/unit/fast_agent/ui/test_command_intent_contract.py
index 9692af2dc..0959a8581 100644
--- a/tests/unit/fast_agent/ui/test_command_intent_contract.py
+++ b/tests/unit/fast_agent/ui/test_command_intent_contract.py
@@ -12,6 +12,7 @@
     McpListCommand,
     McpSessionCommand,
     ShowHistoryCommand,
+    ToggleTraceCommand,
     UnknownCommand,
 )
 from fast_agent.ui.prompt import parse_special_input
@@ -153,6 +154,11 @@
             UnknownCommand(command="/does-not-exist"),
             id="unknown-command-fallback",
         ),
+        pytest.param(
+            "***TRACE",
+            ToggleTraceCommand(),
+            id="hidden-trace-toggle",
+        ),
     ],
 )
 def test_parse_special_input_intent_contract(
diff --git a/tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py b/tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py
index ccd14eb6a..654a008cf 100644
--- a/tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py
+++ b/tests/unit/fast_agent/ui/test_enhanced_prompt_toolbar.py
@@ -173,3 +173,10 @@ def test_format_toolbar_agent_identity_omits_badge_for_basic_agent() -> None:
 
     assert "[S]" not in identity
     assert "agent " in identity
+
+
+def test_format_toolbar_agent_identity_includes_a2a_badge_and_magenta() -> None:
+    identity = _format_toolbar_agent_identity("remote", "ansiblue", _StubAgent(AgentType.A2A))
+
+    assert "remote[A2A]" in identity
+    assert "ansimagenta" in identity
diff --git a/tests/unit/fast_agent/ui/test_input_toolbar.py b/tests/unit/fast_agent/ui/test_input_toolbar.py
index 3b4a8a068..d937ac19a 100644
--- a/tests/unit/fast_agent/ui/test_input_toolbar.py
+++ b/tests/unit/fast_agent/ui/test_input_toolbar.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, cast
 
 from fast_agent.agents.workflow.parallel_agent import ParallelAgent
+from fast_agent.llm.trace import set_llm_trace_enabled
 from fast_agent.ui.attachment_indicator import DraftAttachmentSummary
 from fast_agent.ui.prompt.attachment_tokens import build_local_attachment_token
 from fast_agent.ui.prompt.input_toolbar import (
@@ -99,6 +100,7 @@ def test_build_middle_segment_prefixes_codex_before_overlay() -> None:
 
 
 def test_build_middle_segment_renders_attachment_indicator() -> None:
+    set_llm_trace_enabled(False)
     middle = _build_middle_segment(
         ToolbarAgentState(
             model_display="gpt-4.1",
@@ -122,6 +124,22 @@ def test_build_middle_segment_renders_attachment_indicator() -> None:
     assert middle.index("gpt-4.1") < middle.index("FAST") < middle.index("WEB")
 
 
+def test_build_middle_segment_renders_trace_indicator() -> None:
+    set_llm_trace_enabled(True)
+    try:
+        middle = _build_middle_segment(
+            ToolbarAgentState(
+                model_display="gpt-4.1",
+                turn_count=3,
+            ),
+            shortcut_text="",
+        )
+    finally:
+        set_llm_trace_enabled(False)
+
+    assert "<style fg='ansired' bg='ansiblack'>*</style>" in middle
+
+
 def test_should_resolve_attachment_summary_only_for_attachment_tokens() -> None:
     assert not _should_resolve_attachment_summary("hello world")
     assert not _should_resolve_attachment_summary("^server:resource")
diff --git a/tests/unit/fast_agent/ui/test_parse_a2a_commands.py b/tests/unit/fast_agent/ui/test_parse_a2a_commands.py
new file mode 100644
index 000000000..3bda6c4f0
--- /dev/null
+++ b/tests/unit/fast_agent/ui/test_parse_a2a_commands.py
@@ -0,0 +1,55 @@
+from fast_agent.ui.command_payloads import A2ACommand
+from fast_agent.ui.enhanced_prompt import parse_special_input
+
+
+def test_parse_a2a_defaults_to_status() -> None:
+    result = parse_special_input("/a2a")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "status"
+    assert result.argument is None
+    assert result.error is None
+
+
+def test_parse_a2a_status_target() -> None:
+    result = parse_special_input("/a2a status remote")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "status"
+    assert result.argument == "remote"
+
+
+def test_parse_a2a_connect_preserves_arguments() -> None:
+    result = parse_special_input("/a2a connect http://127.0.0.1:41241 --transport JSONRPC")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "connect"
+    assert result.argument == "http://127.0.0.1:41241 --transport JSONRPC"
+    assert result.error is None
+
+
+def test_parse_a2a_connect_preserves_oauth_switch() -> None:
+    result = parse_special_input("/a2a connect http://127.0.0.1:41241 --oauth")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "connect"
+    assert result.argument == "http://127.0.0.1:41241 --oauth"
+    assert result.error is None
+
+
+def test_parse_a2a_unknown_action_reports_error() -> None:
+    result = parse_special_input("/a2a wat remote")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "wat"
+    assert result.argument == "remote"
+    assert result.error == "Unknown /a2a action: wat"
+
+
+def test_parse_a2a_transport_target() -> None:
+    result = parse_special_input("/a2a transport remote")
+    assert isinstance(result, A2ACommand)
+    assert result.action == "transport"
+    assert result.argument == "remote"
+
+
+def test_parse_a2a_help_variants() -> None:
+    for command in ["/a2a help", "/a2a ?", "/a2a -h", "/a2a --help", "/a2a commands"]:
+        result = parse_special_input(command)
+        assert isinstance(result, A2ACommand)
+        assert result.error is None
diff --git a/tests/unit/test_a2a_docs_pipeline.py b/tests/unit/test_a2a_docs_pipeline.py
new file mode 100644
index 000000000..e30ccedcc
--- /dev/null
+++ b/tests/unit/test_a2a_docs_pipeline.py
@@ -0,0 +1,103 @@
+import importlib.util
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[2]
+PIPELINE_PATH = ROOT / "scripts" / "a2a_docs_pipeline.py"
+
+spec = importlib.util.spec_from_file_location("a2a_docs_pipeline", PIPELINE_PATH)
+assert spec is not None
+assert spec.loader is not None
+a2a_docs_pipeline = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(a2a_docs_pipeline)
+
+
+def test_a2a_docs_snippets_are_current() -> None:
+    a2a_docs_pipeline.check()
+
+
+def test_a2a_getting_started_includes_generated_snippets() -> None:
+    page = ROOT / "docs" / "docs" / "a2a" / "getting-started.md"
+    text = page.read_text(encoding="utf-8")
+    for filename in [
+        "start-fake-server.sh",
+        "cli-stream-command.sh",
+        "cli-stream-output.txt",
+        "cli-files-command.sh",
+        "cli-files-output.txt",
+        "agent-card.yaml",
+        "tui-session.txt",
+    ]:
+        assert f'docs/docs/a2a/snippets/{filename}' in text
+
+
+def test_a2a_cast_asset_is_present() -> None:
+    asset = ROOT / "docs" / "docs" / "assets" / "a2a" / "a2a-streaming-files.cast"
+    assert asset.is_file()
+    first_line = asset.read_text(encoding="utf-8").splitlines()[0]
+    assert '"version"' in first_line
+
+
+def test_a2a_client_server_cast_assets_are_present() -> None:
+    assets = ROOT / "docs" / "docs" / "assets" / "a2a"
+    for filename in [
+        "a2a-client-cli.cast",
+        "a2a-client-input-required.cast",
+        "a2a-server-card.cast",
+    ]:
+        asset = assets / filename
+        assert asset.is_file()
+        first_line = asset.read_text(encoding="utf-8").splitlines()[0]
+        assert '"version"' in first_line
+
+
+def test_a2a_getting_started_embeds_asciinema_player() -> None:
+    page = ROOT / "docs" / "docs" / "a2a" / "getting-started.md"
+    text = page.read_text(encoding="utf-8")
+    assert "AsciinemaPlayer.create" in text
+    assert "../../assets/a2a/a2a-streaming-files.cast" in text
+    assert "../../assets/vendor/asciinema-player/asciinema-player.css" in text
+    assert "../../assets/vendor/asciinema-player/catppuccin.css" in text
+    assert "../../assets/vendor/asciinema-player/asciinema-player.min.js" in text
+    assert "fast-agent-dark" in text
+    assert "fast-agent-light" in text
+    assert 'data-a2a-terminal-theme="auto"' in text
+    assert 'data-a2a-terminal-theme="light"' in text
+    assert 'data-a2a-terminal-theme="dark"' in text
+    assert "rows: 27" in text
+
+
+def test_a2a_client_server_pages_embed_recordings() -> None:
+    client = (ROOT / "docs" / "docs" / "a2a" / "client.md").read_text(encoding="utf-8")
+    server = (ROOT / "docs" / "docs" / "a2a" / "server.md").read_text(encoding="utf-8")
+    assert "a2a-client-cli.cast" in client
+    assert "a2a-client-input-required.cast" in client
+    assert "AsciinemaPlayer.create" in client
+    assert "a2a-server-card.cast" in server
+    assert "AsciinemaPlayer.create" in server
+
+
+def test_asciinema_player_vendor_assets_are_present() -> None:
+    vendor = ROOT / "docs" / "docs" / "assets" / "vendor" / "asciinema-player"
+    css = vendor / "asciinema-player.css"
+    catppuccin = vendor / "catppuccin.css"
+    js = vendor / "asciinema-player.min.js"
+    assert css.is_file()
+    assert catppuccin.is_file()
+    assert js.is_file()
+    assert "ap-wrapper" in css.read_text(encoding="utf-8")
+    catppuccin_text = catppuccin.read_text(encoding="utf-8")
+    assert "asciinema-player-theme-fast-agent-dark" in catppuccin_text
+    assert "asciinema-player-theme-fast-agent-light" in catppuccin_text
+    assert "a2a-terminal-theme-switch" in catppuccin_text
+    assert "AsciinemaPlayer" in js.read_text(encoding="utf-8")[:200]
+
+
+def test_a2a_cast_contains_ansi_escape_sequences() -> None:
+    asset = ROOT / "docs" / "docs" / "assets" / "a2a" / "a2a-streaming-files.cast"
+    assert "\\u001b[" in asset.read_text(encoding="utf-8")
+
+
+def test_a2a_cast_uses_compact_rows() -> None:
+    asset = ROOT / "docs" / "docs" / "assets" / "a2a" / "a2a-streaming-files.cast"
+    first_line = asset.read_text(encoding="utf-8").splitlines()[0]
+    assert '"height": 27' in first_line
diff --git a/tests/unit/test_docs_generation_formatting.py b/tests/unit/test_docs_generation_formatting.py
new file mode 100644
index 000000000..81b270c03
--- /dev/null
+++ b/tests/unit/test_docs_generation_formatting.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from typing import Any, Literal
+
+
+def _load_generate_reference_docs() -> Any:
+    path = Path(__file__).resolve().parents[2] / "docs" / "generate_reference_docs.py"
+    spec = importlib.util.spec_from_file_location("generate_reference_docs", path)
+    assert spec is not None
+    loader = spec.loader
+    assert loader is not None
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["generate_reference_docs"] = module
+    loader.exec_module(module)
+    return module
+
+
+_docs = _load_generate_reference_docs()
+
+
+def test_format_type_uses_pipe_none_for_optional_annotations() -> None:
+    assert _docs._format_type(str | None) == "str | None"
+    assert _docs._format_type(dict[str, Any] | None) == "dict[str, Any] | None"
+
+
+def test_format_type_quotes_literal_string_values() -> None:
+    assert _docs._format_type(Literal["auto", "off"] | None) == "Literal['auto', 'off'] | None"
+
+
+def test_normalize_signature_text_hides_pathlib_internal_module() -> None:
+    signature = "(instruction: str | pathlib._local.Path | None = None)"
+
+    assert _docs._normalize_signature_text(signature) == (
+        "(instruction: str | pathlib.Path | None = None)"
+    )