diff --git a/cli/DEVELOPMENT.md b/cli/DEVELOPMENT.md index f5db2895..c73852aa 100644 --- a/cli/DEVELOPMENT.md +++ b/cli/DEVELOPMENT.md @@ -1,11 +1,11 @@ -# Dirac CLI +# ISAAC CLI -The official CLI for Dirac. Run Dirac tasks directly from the terminal with the same underlying functionality as the VS Code extension. +The official CLI for ISAAC. Run ISAAC tasks directly from the terminal with the same underlying functionality as the VS Code extension. ## Features - **Reuses Core Codebase**: Shares the same Controller, Task, and API handling as the VS Code extension -- **Terminal Output**: Displays Dirac messages directly in your terminal with colored output +- **Terminal Output**: Displays ISAAC messages directly in your terminal with colored output - **Task History**: Access your task history from the command line - **Configurable**: Use custom configuration directories and working directories - **Image Support**: Attach images to your prompts using file paths or inline references @@ -14,7 +14,7 @@ The official CLI for Dirac. Run Dirac tasks directly from the terminal with the - Node.js 20.x or later - npm or yarn -- The parent Dirac project dependencies installed +- The parent ISAAC project dependencies installed ## Installation @@ -43,17 +43,17 @@ npm run cli:link ### Interactive Mode (Default) -When you run `dirac` without any command, it launches an interactive welcome prompt: +When you run `isaac` without any command, it launches an interactive welcome prompt: ```bash # Launch interactive mode -dirac +isaac # Or run a task directly -dirac "Create a hello world function in Python" +isaac "Create a hello world function in Python" # With options -dirac -v --thinking "Analyze this codebase" +isaac -v --thinking "Analyze this codebase" ``` ### Commands @@ -63,8 +63,8 @@ dirac -v --thinking "Analyze this codebase" Run a new task with a prompt. ```bash -dirac task "Create a hello world function in Python" -dirac t "Create a hello world function" +isaac task "Create a hello world function in Python" +isaac t "Create a hello world function" ``` **Options:** @@ -78,29 +78,29 @@ dirac t "Create a hello world function" | `-i, --images ` | Image file paths to include with the task | | `-v, --verbose` | Show verbose output including reasoning | | `-c, --cwd ` | Working directory for the task | -| `--config ` | Path to Dirac configuration directory | +| `--config ` | Path to ISAAC configuration directory | | `-t, --thinking` | Enable extended thinking (1024 token budget) | **Examples:** ```bash # Run in plan mode with verbose output -dirac task -p -v "Design a REST API" +isaac task -p -v "Design a REST API" # Use a specific model with yolo mode -dirac task -m claude-sonnet-4-5-20250929 -y "Refactor this function" +isaac task -m claude-sonnet-4-5-20250929 -y "Refactor this function" # Include images with your prompt -dirac task -i screenshot.png diagram.jpg "Fix the UI based on these images" +isaac task -i screenshot.png diagram.jpg "Fix the UI based on these images" # Or use inline image references in the prompt -dirac task "Fix the layout shown in @./screenshot.png" +isaac task "Fix the layout shown in @./screenshot.png" # Enable extended thinking for complex tasks -dirac task -t "Architect a microservices system" +isaac task -t "Architect a microservices system" # Specify working directory -dirac task -c /path/to/project "Add unit tests" +isaac task -c /path/to/project "Add unit tests" ``` #### `history` (alias: `h`) @@ -108,8 +108,8 @@ dirac task -c /path/to/project "Add unit tests" List task history with pagination support. ```bash -dirac history -dirac h +isaac history +isaac h ``` **Options:** @@ -118,19 +118,19 @@ dirac h |--------|-------------| | `-n, --limit ` | Number of tasks to show (default: 10) | | `-p, --page ` | Page number, 1-based (default: 1) | -| `--config ` | Path to Dirac configuration directory | +| `--config ` | Path to ISAAC configuration directory | **Examples:** ```bash # Show last 10 tasks (default) -dirac history +isaac history # Show 20 tasks -dirac history -n 20 +isaac history -n 20 # Show page 2 with 5 tasks per page -dirac history -n 5 -p 2 +isaac history -n 5 -p 2 ``` #### `config` @@ -138,21 +138,21 @@ dirac history -n 5 -p 2 Show current configuration including global and workspace state. ```bash -dirac config +isaac config ``` **Options:** | Option | Description | |--------|-------------| -| `--config ` | Path to Dirac configuration directory | +| `--config ` | Path to ISAAC configuration directory | #### `auth` Authenticate a provider and configure what model is used. ```bash -dirac auth +isaac auth ``` **Options:** @@ -165,22 +165,22 @@ dirac auth | `-b, --baseurl ` | Base URL (optional, only for openai provider) | | `-v, --verbose` | Show verbose output | | `-c, --cwd ` | Working directory for the task | -| `--config ` | Path to Dirac configuration directory | +| `--config ` | Path to ISAAC configuration directory | **Examples:** ```bash # Interactive authentication -dirac auth +isaac auth # Quick setup with provider and API key -dirac auth -p anthropic -k sk-ant-xxxxx +isaac auth -p anthropic -k sk-ant-xxxxx # Full quick setup with model -dirac auth -p openai-native -k sk-xxxxx -m gpt-4o +isaac auth -p openai-native -k sk-xxxxx -m gpt-4o # OpenAI-compatible provider with custom base URL -dirac auth -p openai -k your-api-key -b https://api.example.com/v1 +isaac auth -p openai -k your-api-key -b https://api.example.com/v1 ``` ### Global Options @@ -191,7 +191,7 @@ These options are available for the default command (running a task directly): |--------|-------------| | `-v, --verbose` | Show verbose output | | `-c, --cwd ` | Working directory | -| `--config ` | Configuration directory | +| `--config ` | ISAAC configuration directory | | `--thinking` | Enable extended thinking (1024 token budget) | ## Development @@ -202,11 +202,11 @@ These options are available for the default command (running a task directly): # 1. Install all dependencies (root, webview-ui, cli) npm run install:all -# 2. Build and link globally so you can run `dirac` from anywhere +# 2. Build and link globally so you can run `isaac` from anywhere npm run cli:link # 3. Test it -dirac --help +isaac --help ``` ### Scripts @@ -218,8 +218,8 @@ Run these from the repository root: | `npm run install:all` | Install deps for root, webview-ui, and cli | | `npm run cli:build` | Generate protos and build CLI | | `npm run cli:build:production` | Production build (minified) | -| `npm run cli:link` | Build and `npm link` so you can run `dirac` from anywhere | -| `npm run cli:unlink` | Remove the global `dirac` symlink | +| `npm run cli:link` | Build and `npm link` so you can run `isaac` from anywhere | +| `npm run cli:unlink` | Remove the global `isaac` symlink | | `npm run cli:dev` | Link + watch mode for development | | `npm run cli:watch` | Watch mode only (no initial build) | | `npm run cli:test` | Run CLI tests | @@ -229,7 +229,7 @@ Run these from the repository root: 1. Run `npm run cli:dev` - this links the CLI globally and starts watch mode 2. Make changes to files in `cli/src/` 3. The build automatically rebuilds on save -4. Test your changes by running `dirac` in another terminal +4. Test your changes by running `isaac` in another terminal 5. When done, run `npm run cli:unlink` to clean up ### Proto Generation @@ -352,7 +352,7 @@ npm run protos npm run cli:build ``` -### "command not found: dirac" +### "command not found: isaac" The CLI isn't linked globally. Run: diff --git a/cli/src/context/StdinContext.test.tsx b/cli/src/context/StdinContext.test.tsx index afaed618..7ed96273 100644 --- a/cli/src/context/StdinContext.test.tsx +++ b/cli/src/context/StdinContext.test.tsx @@ -1,5 +1,6 @@ import { Text } from "ink" import { render } from "ink-testing-library" +// biome-ignore lint/correctness/noUnusedImports: required in scope for the classic JSX runtime (tsconfig jsx: "react") import React from "react" import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { StdinProvider } from "./StdinContext" diff --git a/cli/src/context/StdinContext.tsx b/cli/src/context/StdinContext.tsx index 4c0757c7..ba131cc1 100644 --- a/cli/src/context/StdinContext.tsx +++ b/cli/src/context/StdinContext.tsx @@ -1,7 +1,7 @@ /** * Context for tracking stdin raw mode support * Used to conditionally disable input handling when stdin doesn't support raw mode - * (e.g., when input is piped: echo "..." | diracdev) + * (e.g., when input is piped: echo "..." | isaac) */ import { useStdin } from "ink" diff --git a/cli/src/exports.ts b/cli/src/exports.ts index 739ef164..6f4ed71f 100644 --- a/cli/src/exports.ts +++ b/cli/src/exports.ts @@ -1,18 +1,18 @@ /** - * Dirac Library Exports + * ISAAC Library Exports * - * This file exports the public API for programmatic use of Dirac. - * Use these classes and types to embed Dirac into your applications. + * This file exports the public API for programmatic use of ISAAC. + * Use these classes and types to embed ISAAC into your applications. * * @example * ```typescript - * import { DiracAgent } from "dirac" + * import { DiracAgent } from "isaac-cli" * * const agent = new DiracAgent() * await agent.initialize({ clientCapabilities: {} }) * const session = await agent.newSession({ cwd: process.cwd() }) * ``` - * @module dirac + * @module isaac-cli */ export { DiracAgent } from "./agent/DiracAgent.js" diff --git a/cli/src/index.test.ts b/cli/src/index.test.ts index 3a110728..95eb4a0d 100644 --- a/cli/src/index.test.ts +++ b/cli/src/index.test.ts @@ -14,7 +14,7 @@ describe("CLI Commands", () => { beforeEach(() => { // Create a fresh program instance for each test program = new Command() - program.name("dirac").description("Dirac CLI - AI coding assistant").version("0.0.0") + program.name("isaac").description("ISAAC — Intelligence Souveraine Ailiance Agent Codeur").version("0.0.0") program.enablePositionalOptions() // Define commands matching index.ts @@ -67,11 +67,6 @@ describe("CLI Commands", () => { .option("--config ", "Configuration directory") .action(() => {}) - program - .command("kanban") - .description("Run npx kanban --agent dirac") - .action(() => {}) - // Default command for interactive mode program .argument("[prompt]", "Task prompt") @@ -86,7 +81,6 @@ describe("CLI Commands", () => { .option("--auto-condense", "Enable AI-powered context compaction instead of mechanical truncation") .option("--hooks-dir ", "Additional hooks directory") .option("--auto-approve-all", "Enable auto-approve all") - .option("--kanban", "Run npx kanban --agent dirac") .action(() => {}) }) @@ -280,13 +274,6 @@ describe("CLI Commands", () => { }) }) - describe("kanban command", () => { - it("should parse kanban command", () => { - const args = ["node", "cli", "kanban"] - program.parse(args) - }) - }) - describe("auth command", () => { it("should parse auth command", () => { const args = ["node", "cli", "auth"] @@ -382,10 +369,6 @@ describe("CLI Commands", () => { expect(program.opts().autoApproveAll).toBe(true) }) - it("should parse --kanban flag", () => { - program.parse(["node", "cli", "--kanban"]) - expect(program.opts().kanban).toBe(true) - }) }) describe("command structure", () => { @@ -395,7 +378,6 @@ describe("CLI Commands", () => { expect(commandNames).toContain("history") expect(commandNames).toContain("config") expect(commandNames).toContain("auth") - expect(commandNames).toContain("kanban") }) it("should have correct aliases", () => { diff --git a/cli/src/init.ts b/cli/src/init.ts index 9359a867..16c4fba1 100644 --- a/cli/src/init.ts +++ b/cli/src/init.ts @@ -113,7 +113,7 @@ export async function initializeCli(options: InitOptions): Promise { const controller = webview.controller as any await telemetryService.captureExtensionActivated() - await telemetryService.captureHostEvent("dirac_cli", "initialized") + await telemetryService.captureHostEvent("isaac_cli", "initialized") // =============== Symbol Index Service =============== // Initialize symbol index for the project in background diff --git a/cli/src/utils/piped.ts b/cli/src/utils/piped.ts index bb56a79b..734ff218 100644 --- a/cli/src/utils/piped.ts +++ b/cli/src/utils/piped.ts @@ -4,9 +4,9 @@ import * as fs from "node:fs" * Read piped input from stdin (non-blocking) * * This function is designed to work with piped input, including chained commands: - * git diff | dirac 'explain' | dirac 'summarize' + * git diff | isaac 'explain' | isaac 'summarize' * - * The challenge is that when chaining dirac commands, the first command may take + * The challenge is that when chaining isaac commands, the first command may take * several seconds to complete, so we can't use a short timeout. Instead, we wait * for EOF which signals that the previous command has finished writing. */ diff --git a/cli/tests/tracing/JsonlTracer.test.ts b/cli/tests/tracing/JsonlTracer.test.ts index b0c6b7f4..dd4e072c 100644 --- a/cli/tests/tracing/JsonlTracer.test.ts +++ b/cli/tests/tracing/JsonlTracer.test.ts @@ -339,4 +339,38 @@ describe("JsonlTracer", () => { tracer.appendTurn({ phase: "execute" }) tracer.close("aborted", 1) }) + + it("scrubs inline credentials from gateway_url before writing meta.json", () => { + const tracer = new JsonlTracer("task-metascrub", tmpDir) + tracer.writeMeta({ + task: "task-metascrub", + mode: "act", + approval_mode: "manual", + ailiance_agent_version: "0.1.0", + gateway_url: "http://admin:hunter2@studio:9300", + }) + const raw = fs.readFileSync(path.join(tmpDir, TRACING_DIR_NAME, "task-metascrub", "meta.json"), "utf8") + expect(raw).not.toContain("hunter2") + expect(raw).toContain("[REDACTED]") + // in-memory meta stays intact for later merges (only the on-disk copy is scrubbed) + tracer.mergeStats({ turns: 1 }) + const after = JSON.parse(fs.readFileSync(path.join(tmpDir, TRACING_DIR_NAME, "task-metascrub", "meta.json"), "utf8")) + expect(after.stats.turns).toBe(1) + }) + + it("scrubs secrets that surface in error strings", () => { + const tracer = new JsonlTracer("task-errscrub", tmpDir) + tracer.writeMeta({ + task: "task-errscrub", + mode: "act", + approval_mode: "manual", + ailiance_agent_version: "0.1.0", + gateway_url: "http://studio:9300", + }) + tracer.recordPlannerTurn("nope", 3, ["auth failed for token=supersecretvalue"]) + const line = JSON.parse( + fs.readFileSync(path.join(tmpDir, TRACING_DIR_NAME, "task-errscrub", "trace.jsonl"), "utf8").trim().split("\n")[0], + ) + expect(JSON.stringify(line.errors)).not.toContain("supersecretvalue") + }) }) diff --git a/src/core/mcp/McpClientManager.ts b/src/core/mcp/McpClientManager.ts index bd09ed15..fbccc367 100644 --- a/src/core/mcp/McpClientManager.ts +++ b/src/core/mcp/McpClientManager.ts @@ -10,6 +10,7 @@ import { Logger } from "../../shared/services/Logger" import { loadMcpConfigsFromPlugins } from "./McpServerConfigLoader" import type { ConnectedClient, McpServerConfig, McpToolMetadata, McpToolResult } from "./types" import { makeQualifiedToolName } from "./types" +import { assertMcpUrlAllowed } from "./urlSecurity" export interface McpLoadFilter { enabledServers?: string[] @@ -118,6 +119,10 @@ class McpClientManager { let transport: Parameters[0] if (cfg.type === "http") { + // Defense-in-depth: the loader already enforces this, but re-check at + // connect time in case a config reached us through another path. + const verdict = assertMcpUrlAllowed(cfg.url) + if (!verdict.ok) throw new Error(`MCP server "${serverId}": ${verdict.reason}`) // Dynamic import so the streamableHttp module (and its ESM-only // eventsource-parser dep) is pulled in only when an HTTP server connects. // esbuild bundles it into the dist; the MCP unit tests run under vitest diff --git a/src/core/mcp/McpServerConfigLoader.ts b/src/core/mcp/McpServerConfigLoader.ts index 0b7fe5fd..b15e2040 100644 --- a/src/core/mcp/McpServerConfigLoader.ts +++ b/src/core/mcp/McpServerConfigLoader.ts @@ -5,6 +5,7 @@ import { Logger } from "@/shared/services/Logger" import { pluginDiscoveryService } from "../plugins/PluginDiscoveryService" import type { McpServerConfig } from "./types" +import { assertMcpUrlAllowed, shouldSendBearer } from "./urlSecurity" interface RawMcpJson { mcpServers?: Record< @@ -40,13 +41,24 @@ function serverTokenEnv(serverId: string): string { // `Authorization: Bearer ` from ISAAC_MCP__TOKEN when set and no // Authorization header was declared. The token is never persisted (the tool // cache stores only a config hash + the tool list). -function resolveHttpHeaders(serverId: string, raw?: Record): Record | undefined { +function resolveHttpHeaders(serverId: string, url: string, raw?: Record): Record | undefined { const headers: Record = {} for (const [k, v] of Object.entries(raw ?? {})) { headers[k] = expandEnvVars(v) } const token = process.env[serverTokenEnv(serverId)]?.trim() const hasAuth = Object.keys(headers).some((k) => k.toLowerCase() === "authorization") + // Token-gate: a bearer credential (injected from env OR declared in .mcp.json) + // must not leave the machine in cleartext. Fail closed — skip the server + // rather than silently leak the token to an insecure public endpoint. + if ((token && !hasAuth) || hasAuth) { + if (!shouldSendBearer(url)) { + throw new Error( + `refusing to send Authorization over insecure channel to ${url} ` + + `(use https, a private/loopback/Tailscale host, or add the host to ISAAC_MCP_ALLOW_HOSTS)`, + ) + } + } if (token && !hasAuth) { headers.Authorization = `Bearer ${token}` } @@ -86,15 +98,47 @@ export async function loadMcpConfigsFromPlugins(): Promise { const kind = server.type ?? "stdio" if (kind !== "stdio" && kind !== "http") continue - // Validate required fields per transport before claiming the id, so an - // invalid entry doesn't shadow a valid same-id server from a later plugin. - if (kind === "stdio" && !server.command) { - Logger.warn(`[mcp] Server "${serverId}" in plugin ${plugin.manifest.name} has no command, skipping`) - continue - } - if (kind === "http" && !server.url) { - Logger.warn(`[mcp] HTTP server "${serverId}" in plugin ${plugin.manifest.name} has no url, skipping`) - continue + // Validate + resolve per transport BEFORE claiming the id, so an invalid + // or security-rejected entry doesn't shadow a valid same-id server from a + // later plugin (mirrors the dedupe-by-id contract below). + const pluginRoot = plugin.rootDir + let resolved: McpServerConfig | null = null + + if (kind === "stdio") { + if (!server.command) { + Logger.warn(`[mcp] Server "${serverId}" in plugin ${plugin.manifest.name} has no command, skipping`) + continue + } + resolved = { + id: serverId, + pluginName: plugin.manifest.name, + pluginRoot, + type: "stdio", + command: expandPluginRoot(server.command, pluginRoot), + args: (server.args ?? []).map((a) => expandPluginRoot(a, pluginRoot)), + } + } else { + if (!server.url) { + Logger.warn(`[mcp] HTTP server "${serverId}" in plugin ${plugin.manifest.name} has no url, skipping`) + continue + } + const url = expandEnvVars(server.url) + // SSRF guard: refuse cloud-metadata endpoints (unless allowlisted). + const verdict = assertMcpUrlAllowed(url) + if (!verdict.ok) { + Logger.warn(`[mcp] HTTP server "${serverId}" in plugin ${plugin.manifest.name}: ${verdict.reason}, skipping`) + continue + } + let headers: Record | undefined + try { + headers = resolveHttpHeaders(serverId, url, server.headers) + } catch (e) { + Logger.warn( + `[mcp] HTTP server "${serverId}" in plugin ${plugin.manifest.name}: ${(e as Error).message}, skipping`, + ) + continue + } + resolved = { id: serverId, pluginName: plugin.manifest.name, pluginRoot, type: "http", url, headers } } const dupOwner = seenServers.get(serverId) @@ -105,27 +149,7 @@ export async function loadMcpConfigsFromPlugins(): Promise { continue } seenServers.set(serverId, plugin.manifest.name) - - const pluginRoot = plugin.rootDir - if (kind === "http") { - configs.push({ - id: serverId, - pluginName: plugin.manifest.name, - pluginRoot, - type: "http", - url: expandEnvVars(server.url!), - headers: resolveHttpHeaders(serverId, server.headers), - }) - } else { - configs.push({ - id: serverId, - pluginName: plugin.manifest.name, - pluginRoot, - type: "stdio", - command: expandPluginRoot(server.command!, pluginRoot), - args: (server.args ?? []).map((a) => expandPluginRoot(a, pluginRoot)), - }) - } + configs.push(resolved) } } diff --git a/src/core/mcp/__tests__/McpServerConfigLoader.test.ts b/src/core/mcp/__tests__/McpServerConfigLoader.test.ts index 34504cb1..3c8980f7 100644 --- a/src/core/mcp/__tests__/McpServerConfigLoader.test.ts +++ b/src/core/mcp/__tests__/McpServerConfigLoader.test.ts @@ -5,6 +5,7 @@ import path from "path" import { afterEach, beforeEach, describe, it } from "vitest" import type { DiscoveredPlugin } from "../../plugins/PluginDiscoveryService" +import type { McpStdioServerConfig } from "../types" // We test loadMcpConfigsFromPlugins by patching pluginDiscoveryService.discover() // to return fake plugins pointing at a tmpdir, without touching the real ~/.claude/plugins. @@ -83,8 +84,8 @@ describe("McpServerConfigLoader", () => { expect(result[0].id).to.equal("my-server") expect(result[0].pluginName).to.equal("plugin-with-mcp") expect(result[0].type).to.equal("stdio") - expect(result[0].command).to.equal("/usr/bin/node") - expect(result[0].args).to.deep.equal(["server.js", "--port", "3000"]) + expect((result[0] as McpStdioServerConfig).command).to.equal("/usr/bin/node") + expect((result[0] as McpStdioServerConfig).args).to.deep.equal(["server.js", "--port", "3000"]) ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] }) @@ -105,7 +106,7 @@ describe("McpServerConfigLoader", () => { const context7s = result.filter((c) => c.id === "context7") expect(context7s).to.have.length(1) expect(context7s[0].pluginName).to.equal("plugin-a") // first declarer wins - expect(context7s[0].args).to.deep.equal(["-y", "@upstash/context7-mcp@2.1.4"]) + expect((context7s[0] as McpStdioServerConfig).args).to.deep.equal(["-y", "@upstash/context7-mcp@2.1.4"]) ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] }) @@ -131,8 +132,8 @@ describe("McpServerConfigLoader", () => { const { loadMcpConfigsFromPlugins } = loaderModule const result = await loadMcpConfigsFromPlugins() expect(result).to.have.length(1) - expect(result[0].command).to.equal(`${fakePlugin.rootDir}/bin/server`) - expect(result[0].args).to.deep.equal([`--root`, `${fakePlugin.rootDir}/data`]) + expect((result[0] as McpStdioServerConfig).command).to.equal(`${fakePlugin.rootDir}/bin/server`) + expect((result[0] as McpStdioServerConfig).args).to.deep.equal([`--root`, `${fakePlugin.rootDir}/data`]) ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] }) @@ -245,4 +246,66 @@ describe("McpServerConfigLoader", () => { ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] } }) + + it("skips an http server pointing at a cloud-metadata endpoint (SSRF guard)", async () => { + const fakePlugin = await createFakePlugin(tmpDir, "owner", "plugin-ssrf", "1.0.0", { name: "plugin-ssrf" }) + await fs.writeFile( + path.join(fakePlugin.rootDir, ".mcp.json"), + JSON.stringify({ + mcpServers: { evil: { type: "http", url: "http://169.254.169.254/latest/meta-data/" } }, + }), + ) + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [fakePlugin] + + const { loadMcpConfigsFromPlugins } = loaderModule + const result = await loadMcpConfigsFromPlugins() + expect(result).to.have.length(0) + + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] + }) + + it("skips an http server when a bearer token would leak over cleartext to a public host (token-gate)", async () => { + const fakePlugin = await createFakePlugin(tmpDir, "owner", "plugin-tokengate", "1.0.0", { name: "plugin-tokengate" }) + await fs.writeFile( + path.join(fakePlugin.rootDir, ".mcp.json"), + JSON.stringify({ + mcpServers: { pub: { type: "http", url: "http://api.public.example/mcp" } }, + }), + ) + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [fakePlugin] + + process.env.ISAAC_MCP_PUB_TOKEN = "secret-token" + try { + const { loadMcpConfigsFromPlugins } = loaderModule + const result = await loadMcpConfigsFromPlugins() + expect(result).to.have.length(0) + } finally { + delete process.env.ISAAC_MCP_PUB_TOKEN + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] + } + }) + + it("injects the bearer token for a private host over http (token-gate allows)", async () => { + const fakePlugin = await createFakePlugin(tmpDir, "owner", "plugin-localtoken", "1.0.0", { name: "plugin-localtoken" }) + await fs.writeFile( + path.join(fakePlugin.rootDir, ".mcp.json"), + JSON.stringify({ + mcpServers: { loc: { type: "http", url: "http://127.0.0.1:8765/mcp" } }, + }), + ) + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [fakePlugin] + + process.env.ISAAC_MCP_LOC_TOKEN = "local-token" + try { + const { loadMcpConfigsFromPlugins } = loaderModule + const result = await loadMcpConfigsFromPlugins() + expect(result).to.have.length(1) + expect((result[0] as { headers?: Record }).headers).to.deep.equal({ + Authorization: "Bearer local-token", + }) + } finally { + delete process.env.ISAAC_MCP_LOC_TOKEN + ;(pluginDiscoveryModule.pluginDiscoveryService as any).discover = async () => [] + } + }) }) diff --git a/src/core/mcp/__tests__/urlSecurity.test.ts b/src/core/mcp/__tests__/urlSecurity.test.ts new file mode 100644 index 00000000..32777f41 --- /dev/null +++ b/src/core/mcp/__tests__/urlSecurity.test.ts @@ -0,0 +1,104 @@ +import { expect } from "chai" +import { afterEach, describe, it } from "vitest" + +import { assertMcpUrlAllowed, isMetadataHost, shouldSendBearer } from "../urlSecurity" + +describe("urlSecurity", () => { + const savedAllow = process.env.ISAAC_MCP_ALLOW_HOSTS + + afterEach(() => { + if (savedAllow === undefined) delete process.env.ISAAC_MCP_ALLOW_HOSTS + else process.env.ISAAC_MCP_ALLOW_HOSTS = savedAllow + }) + + describe("assertMcpUrlAllowed (SSRF guard)", () => { + it("blocks known cloud-metadata endpoints", () => { + for (const u of [ + "http://169.254.169.254/latest/meta-data/", + "http://metadata.google.internal/computeMetadata/v1/", + "http://metadata.goog/", + "http://100.100.100.200/", + "http://[fd00:ec2::254]/", + ]) { + expect(assertMcpUrlAllowed(u).ok, u).to.equal(false) + } + }) + + it("allows public https, loopback and private hosts", () => { + for (const u of [ + "https://api.example.com/mcp", + "http://localhost:3000/mcp", + "http://127.0.0.1:8080/", + "http://10.0.0.5/mcp", + "http://192.168.1.10/mcp", + "http://100.64.0.1/mcp", // Tailscale CGNAT + ]) { + expect(assertMcpUrlAllowed(u).ok, u).to.equal(true) + } + }) + + it("blocks IPv4-mapped IPv6 and trailing-dot evasions of the metadata block", () => { + for (const u of [ + "http://[::ffff:169.254.169.254]/", // dotted IPv4-mapped (Node compresses) + "http://[::ffff:a9fe:a9fe]/", // hextet IPv4-mapped form of 169.254.169.254 + "http://[::ffff:6464:64c8]/", // 100.100.100.200 (Alibaba) mapped + "http://169.254.169.254./", // trailing FQDN dot + "http://metadata.google.internal./", + ]) { + expect(assertMcpUrlAllowed(u).ok, u).to.equal(false) + } + }) + + it("rejects invalid urls and non-http schemes", () => { + expect(assertMcpUrlAllowed("not a url").ok).to.equal(false) + expect(assertMcpUrlAllowed("ftp://example.com").ok).to.equal(false) + expect(assertMcpUrlAllowed("file:///etc/passwd").ok).to.equal(false) + }) + + it("honours ISAAC_MCP_ALLOW_HOSTS to override a metadata block", () => { + expect(assertMcpUrlAllowed("http://169.254.169.254/").ok).to.equal(false) + process.env.ISAAC_MCP_ALLOW_HOSTS = "169.254.169.254, other.host" + expect(assertMcpUrlAllowed("http://169.254.169.254/").ok).to.equal(true) + }) + }) + + describe("shouldSendBearer (token-gate)", () => { + it("allows token over https (any host)", () => { + expect(shouldSendBearer("https://api.example.com/mcp")).to.equal(true) + }) + + it("allows token over http only for private/loopback/CGNAT hosts", () => { + expect(shouldSendBearer("http://localhost:3000/")).to.equal(true) + expect(shouldSendBearer("http://127.0.0.1/")).to.equal(true) + expect(shouldSendBearer("http://10.1.2.3/")).to.equal(true) + expect(shouldSendBearer("http://192.168.0.2/")).to.equal(true) + expect(shouldSendBearer("http://172.16.0.1/")).to.equal(true) + expect(shouldSendBearer("http://100.96.0.7/")).to.equal(true) + }) + + it("recognises IPv4-mapped IPv6 private addresses as token-safe", () => { + expect(shouldSendBearer("http://[::ffff:192.168.1.1]/")).to.equal(true) + expect(shouldSendBearer("http://[::ffff:c0a8:0101]/")).to.equal(true) // hextet form of 192.168.1.1 + }) + + it("refuses token over http to public hosts", () => { + expect(shouldSendBearer("http://api.example.com/mcp")).to.equal(false) + expect(shouldSendBearer("http://8.8.8.8/")).to.equal(false) + // link-local is NOT auto-trusted (ambiguous with metadata) + expect(shouldSendBearer("http://169.254.1.1/")).to.equal(false) + }) + + it("honours ISAAC_MCP_ALLOW_HOSTS for an otherwise-unsafe host", () => { + expect(shouldSendBearer("http://gw.public.example/")).to.equal(false) + process.env.ISAAC_MCP_ALLOW_HOSTS = "gw.public.example" + expect(shouldSendBearer("http://gw.public.example/")).to.equal(true) + }) + }) + + describe("isMetadataHost", () => { + it("matches case-insensitively", () => { + expect(isMetadataHost("METADATA.GOOGLE.INTERNAL")).to.equal(true) + expect(isMetadataHost("example.com")).to.equal(false) + }) + }) +}) diff --git a/src/core/mcp/urlSecurity.ts b/src/core/mcp/urlSecurity.ts new file mode 100644 index 00000000..911e18e0 --- /dev/null +++ b/src/core/mcp/urlSecurity.ts @@ -0,0 +1,139 @@ +// --------------------------------------------------------------------------- +// MCP HTTP URL security +// +// Two independent guards for http(s) MCP servers whose URL + bearer token come +// from a plugin-supplied .mcp.json (an untrusted source): +// +// 1. SSRF guard (assertMcpUrlAllowed): refuse to connect to known cloud +// metadata endpoints (169.254.169.254 & friends). Loopback and private +// ranges stay allowed — local/LAN MCP servers are a legitimate, common +// setup. Hosts listed in ISAAC_MCP_ALLOW_HOSTS bypass the block. +// +// 2. Bearer token-gate (shouldSendBearer): only attach `Authorization: Bearer` +// when the channel is safe — https, OR a loopback / RFC1918 / CGNAT +// (Tailscale 100.64/10) host, OR an explicitly allowlisted host. A token +// must never leave the machine in cleartext to a public http:// endpoint. +// +// Limitation: checks operate on the URL's literal host. DNS rebinding (a public +// name resolving to a private IP at connect time) is out of scope for this +// .mcp.json threat model and documented as such. +// --------------------------------------------------------------------------- + +/** Cloud instance-metadata endpoints — connecting here is almost always SSRF. */ +const METADATA_HOSTS = new Set([ + "169.254.169.254", // AWS / Azure / GCP / OpenStack / DigitalOcean + "metadata.google.internal", // GCP + "metadata.goog", // GCP + "100.100.100.200", // Alibaba Cloud (note: inside CGNAT 100.64/10) + "fd00:ec2::254", // AWS IPv6 IMDS +]) + +/** + * url.hostname keeps the [..] brackets for IPv6 literals; strip them + lowercase. + * Also canonicalise two evasion forms so downstream checks see the real address: + * - trailing FQDN dot ("169.254.169.254." / "metadata.google.internal.") + * - IPv4-mapped IPv6 ("::ffff:a9fe:a9fe" or "::ffff:169.254.169.254") -> dotted + * IPv4, so both the metadata set and the private-range check apply. + */ +function normalizeHost(hostname: string): string { + let h = hostname.replace(/^\[/, "").replace(/\]$/, "").toLowerCase() + if (h.length > 1 && h.endsWith(".")) h = h.slice(0, -1) + const mappedHex = h.match(/^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/) + if (mappedHex) { + const hi = Number.parseInt(mappedHex[1], 16) + const lo = Number.parseInt(mappedHex[2], 16) + return `${hi >> 8}.${hi & 0xff}.${lo >> 8}.${lo & 0xff}` + } + const mappedDotted = h.match(/^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) + if (mappedDotted) return mappedDotted[1] + return h +} + +function envAllowlist(): Set { + const raw = process.env.ISAAC_MCP_ALLOW_HOSTS + if (!raw) return new Set() + return new Set( + raw + .split(",") + .map((h) => h.trim().toLowerCase()) + .filter(Boolean), + ) +} + +function parseIpv4(host: string): number[] | null { + const parts = host.split(".") + if (parts.length !== 4) return null + const octets: number[] = [] + for (const p of parts) { + if (!/^\d{1,3}$/.test(p)) return null + const n = Number(p) + if (n > 255) return null + octets.push(n) + } + return octets +} + +/** Loopback / RFC1918 / link-local-ULA / CGNAT — "private enough" to send a token over http. */ +function isPrivateHost(hostname: string): boolean { + const host = hostname.toLowerCase() + if (host === "localhost" || host.endsWith(".localhost")) return true + if (host === "::1") return true + // IPv6 unique-local (fc00::/7 → fc.. / fd..). url.hostname strips [] brackets. + if (/^f[cd][0-9a-f]{0,2}:/.test(host)) return true + + const v4 = parseIpv4(host) + if (!v4) return false + const [a, b] = v4 + if (a === 127) return true // 127.0.0.0/8 loopback + if (a === 10) return true // 10.0.0.0/8 + if (a === 192 && b === 168) return true // 192.168.0.0/16 + if (a === 172 && b >= 16 && b <= 31) return true // 172.16.0.0/12 + // 100.64.0.0/10 CGNAT (Tailscale). NB: 100.100.100.200 (Alibaba metadata) + // falls in this range, but the SSRF guard checks isMetadataHost FIRST and + // blocks the connection outright, so it never reaches the token-gate here. + if (a === 100 && b >= 64 && b <= 127) return true + return false +} + +export function isMetadataHost(hostname: string): boolean { + return METADATA_HOSTS.has(normalizeHost(hostname)) +} + +/** Whether a bearer token may be attached for this URL (token-gate policy). */ +export function shouldSendBearer(rawUrl: string): boolean { + let url: URL + try { + url = new URL(rawUrl) + } catch { + return false + } + const host = normalizeHost(url.hostname) + if (envAllowlist().has(host)) return true + if (url.protocol === "https:") return true + if (url.protocol === "http:" && isPrivateHost(host)) return true + return false +} + +export type McpUrlVerdict = { ok: true } | { ok: false; reason: string } + +/** SSRF guard: block cloud-metadata endpoints unless explicitly allowlisted. */ +export function assertMcpUrlAllowed(rawUrl: string): McpUrlVerdict { + let url: URL + try { + url = new URL(rawUrl) + } catch { + return { ok: false, reason: `invalid url "${rawUrl}"` } + } + if (url.protocol !== "http:" && url.protocol !== "https:") { + return { ok: false, reason: `unsupported scheme "${url.protocol}" (only http/https)` } + } + const host = normalizeHost(url.hostname) + if (envAllowlist().has(host)) return { ok: true } + if (isMetadataHost(host)) { + return { + ok: false, + reason: `blocked cloud-metadata endpoint "${host}" (set ISAAC_MCP_ALLOW_HOSTS to override)`, + } + } + return { ok: true } +} diff --git a/src/core/storage/disk.ts b/src/core/storage/disk.ts index bf8c6d79..4f9193d3 100644 --- a/src/core/storage/disk.ts +++ b/src/core/storage/disk.ts @@ -27,11 +27,13 @@ import { StateManager } from "./StateManager" * @param filePath - The target file path * @param data - The data to write */ -async function atomicWriteFile(filePath: string, data: string): Promise { +async function atomicWriteFile(filePath: string, data: string, mode?: number): Promise { const tmpPath = `${filePath}.tmp.${process.pid}.${Date.now()}.${randomUUID()}` try { - // Write to temporary file first - await fs.writeFile(tmpPath, data, "utf8") + // Write to temporary file first. When a mode is given, create the temp + // file with it so the renamed target inherits restrictive permissions + // (used for files that may hold conversation secrets). + await fs.writeFile(tmpPath, data, mode !== undefined ? { encoding: "utf8", mode } : "utf8") // Rename temp file to target (atomic in most cases) await fs.rename(tmpPath, filePath) } catch (error) { @@ -243,7 +245,11 @@ export async function saveApiConversationHistory(taskId: string, apiConversation syncWorker().enqueue(taskId, fileName, data) // Store locally const filePath = path.join(await ensureTaskDirectoryExists(taskId), fileName) - await atomicWriteFile(filePath, data) + // 0600: the conversation history is a plaintext copy of the full LLM + // exchange and may contain secrets the user/tools pasted. It is replayed + // to resume a task, so it is deliberately NOT scrubbed — restrict it to + // the owner instead. + await atomicWriteFile(filePath, data, 0o600) } } catch (error) { // in the off chance this fails, we don't want to stop the task diff --git a/src/core/tracing/JsonlTracer.ts b/src/core/tracing/JsonlTracer.ts index b468a86b..d561c63b 100644 --- a/src/core/tracing/JsonlTracer.ts +++ b/src/core/tracing/JsonlTracer.ts @@ -244,7 +244,7 @@ export class JsonlTracer { success: input.tool_execution.success, } : null, - errors: input.errors ?? [], + errors: input.errors ? scrubSecrets(input.errors) : [], } this.queueAppend(this.tracePath, line) return line @@ -315,7 +315,11 @@ export class JsonlTracer { // uses the same pattern. const tmpPath = `${this.metaPath}.tmp` try { - fs.writeFileSync(tmpPath, JSON.stringify(this.meta, null, 2), "utf8") + // Scrub at the single persist point so credential-bearing fields + // (gateway_url with inline creds, worker endpoints) never hit disk. + // scrubSecrets returns a deep copy, so this.meta stays intact for + // later merges (mergeStats / close). + fs.writeFileSync(tmpPath, JSON.stringify(scrubSecrets(this.meta), null, 2), "utf8") fs.renameSync(tmpPath, this.metaPath) } catch (_err) { // swallow — tracing is non-fatal. Best-effort cleanup of the tmp. diff --git a/src/services/local-router/HealthMonitor.ts b/src/services/local-router/HealthMonitor.ts index af3f3dca..a87723de 100644 --- a/src/services/local-router/HealthMonitor.ts +++ b/src/services/local-router/HealthMonitor.ts @@ -38,22 +38,29 @@ export class HealthMonitor { private async check(id: string, w: WorkerEndpoint): Promise { try { - // Try a lightweight /health endpoint first; fall back to /v1/models + // Try a lightweight /health endpoint first; fall back to /v1/models. + // Each attempt gets its OWN controller + timeout: a shared one meant the + // fallback fetch could start with an already-fired 5s timer and abort + // instantly, hiding a worker that was actually up. const base = w.url.replace(/\/v1\/?$/, "").replace(/\/$/, "") - const ctrl = new AbortController() - const timeout = setTimeout(() => ctrl.abort(), 5_000) try { - const r = await fetch(`${base}/health`, { signal: ctrl.signal }) - this.health.set(id, r.ok ? "up" : "down") + this.health.set(id, (await HealthMonitor.pingOnce(`${base}/health`)) ? "up" : "down") } catch { - // try /v1/models - const r = await fetch(`${base}/v1/models`, { signal: ctrl.signal }) - this.health.set(id, r.ok ? "up" : "down") - } finally { - clearTimeout(timeout) + this.health.set(id, (await HealthMonitor.pingOnce(`${base}/v1/models`)) ? "up" : "down") } } catch { this.health.set(id, "down") } } + + private static async pingOnce(url: string): Promise { + const ctrl = new AbortController() + const timeout = setTimeout(() => ctrl.abort(), 5_000) + try { + const r = await fetch(url, { signal: ctrl.signal }) + return r.ok + } finally { + clearTimeout(timeout) + } + } } diff --git a/src/services/local-router/LocalRouter.ts b/src/services/local-router/LocalRouter.ts index a047218d..0f3596cd 100644 --- a/src/services/local-router/LocalRouter.ts +++ b/src/services/local-router/LocalRouter.ts @@ -67,6 +67,12 @@ function combineAbortSignals(signals: (AbortSignal | undefined)[]): { * in state-keys.ts). Kept local so this module stays buildable from contexts * where StateManager isn't initialized (unit tests, CLI bootstrap). */ +// Escape a string for safe interpolation into a RegExp — tool names with regex +// metacharacters (e.g. "file.write") would otherwise corrupt the pattern. +function escapeRegExp(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") +} + const DEFAULT_LOCAL_ROUTER_TOTAL_TIMEOUT_MS = 60_000 const DEFAULT_LOCAL_ROUTER_IDLE_TIMEOUT_MS = 20_000 @@ -190,19 +196,47 @@ export class LocalRouter { const url = worker.url.replace(/\/$/, "") const body = { ...req, model: worker.modelId, stream: false } - const res = await fetch(`${url}/chat/completions`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - }) - if (!res.ok) { - const text = await res.text().catch(() => "") - throw new Error(`[LocalRouter] worker ${worker.id} returned ${res.status}: ${text.slice(0, 200)}`) + + // Wall-clock guard. The streaming path has both total + idle timers, but + // the non-streaming path produces no chunks, so only `total` applies here. + // Without it a worker that accepts the POST and never answers hangs the + // agent forever (this was the missing guard vs chatStream). + const totalTimeoutMs = sanitizeTimeout(req.timeoutMs, DEFAULT_LOCAL_ROUTER_TOTAL_TIMEOUT_MS) + const { controller, dispose: detachSignals } = combineAbortSignals([req.signal]) + let timedOut = false + const totalTimer = setTimeout(() => { + timedOut = true + controller.abort(new LocalRouterTimeoutError("total", worker.id, totalTimeoutMs)) + }, totalTimeoutMs) + const cleanup = () => { + clearTimeout(totalTimer) + detachSignals() + if (!controller.signal.aborted) controller.abort() + } + + try { + const res = await fetch(`${url}/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + signal: controller.signal, + }) + if (!res.ok) { + const text = await res.text().catch(() => "") + throw new Error(`[LocalRouter] worker ${worker.id} returned ${res.status}: ${text.slice(0, 200)}`) + } + const data = (await res.json()) as ChatResponse + this.cache.set(cacheKey, data) + routingObserver.emit({ ts: Date.now(), category, workerId: worker.id, cacheHit: false, estTokens }) + return data + } catch (err) { + // Surface the timeout as the typed error so callers can fall back on it, + // rather than a raw AbortError. + if (timedOut) throw new LocalRouterTimeoutError("total", worker.id, totalTimeoutMs) + throw err + } finally { + cleanup() } - const data = (await res.json()) as ChatResponse - this.cache.set(cacheKey, data) - routingObserver.emit({ ts: Date.now(), category, workerId: worker.id, cacheHit: false, estTokens }) - return data } /** @@ -279,13 +313,17 @@ export class LocalRouter { const command = bashFence[1].trim() if (!command) return null return { + // requires_approval: true — a command parsed out of a model's + // markdown fence is untrusted; it must go through the approval + // gate (e.g. `rm -rf` should never auto-run). Auto-approval modes + // downstream can still bypass this; the safe default does not. match: bashFence, - toolCall: { name: "execute_command", arguments: { command, requires_approval: false } }, + toolCall: { name: "execute_command", arguments: { command, requires_approval: true } }, } } case "plain_func": { for (const toolName of toolNames) { - const re = new RegExp(`\\b${toolName}\\s*\\(([^)]*)\\)`) + const re = new RegExp(`\\b${escapeRegExp(toolName)}\\s*\\(([^)]*)\\)`) const m = buf.match(re) if (!m) continue const args = LocalRouter.parsePlainArgs( diff --git a/src/services/local-router/ResponseCache.ts b/src/services/local-router/ResponseCache.ts index 5594c35f..16564692 100644 --- a/src/services/local-router/ResponseCache.ts +++ b/src/services/local-router/ResponseCache.ts @@ -44,9 +44,17 @@ export class ResponseCache { set(key: string, value: ChatResponse): void { if (this.map.size >= this.maxSize && !this.map.has(key)) { - // evict oldest - const firstKey = this.map.keys().next().value - if (firstKey) this.map.delete(firstKey) + // First reclaim expired entries — otherwise a full cache of stale + // entries would evict a fresh one by LRU while the stale ones linger. + const now = Date.now() + for (const [k, e] of this.map) { + if (e.expiresAt < now) this.map.delete(k) + } + // Still at capacity? evict the oldest (LRU). + if (this.map.size >= this.maxSize) { + const firstKey = this.map.keys().next().value + if (firstKey) this.map.delete(firstKey) + } } this.map.set(key, { value, expiresAt: Date.now() + this.ttlMs }) } diff --git a/src/services/local-router/__tests__/LocalRouter.test.ts b/src/services/local-router/__tests__/LocalRouter.test.ts index 506b5693..ffb12f1e 100644 --- a/src/services/local-router/__tests__/LocalRouter.test.ts +++ b/src/services/local-router/__tests__/LocalRouter.test.ts @@ -395,7 +395,9 @@ describe("LocalRouter", () => { assert.strictEqual(tc.name, "execute_command") const args = JSON.parse(tc.argumentsRaw) as { command: string; requires_approval: boolean } assert.strictEqual(args.command, "ls -la /tmp") - assert.strictEqual(args.requires_approval, false) + // Commands parsed from a model's markdown fence must default to requiring + // approval — they are untrusted and must hit the approval gate. + assert.strictEqual(args.requires_approval, true) router.dispose() })