diff --git a/README.md b/README.md index 7d15239..ed4e585 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,12 @@ behavioral litmus. For servers it is hybrid — a fast lookup of the published g when ungraded; for skills it is a fast static scan. Un-gradeable targets warn unless `strict`. It's on the **[GitHub Marketplace](https://github.com/marketplace/actions/polygraph-mcp-gate)** as -`polygraphso/litmus@v1` — drop it into a workflow: +`polygraphso/litmus@v1`. For a security gate, pin to a commit SHA rather than the mutable `@v1` tag: ```yaml # .github/workflows/mcp-gate.yml name: mcp-gate -on: [pull_request] +on: [pull_request] # NOT pull_request_target — that exposes secrets to fork PRs permissions: contents: read jobs: @@ -39,23 +39,30 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - - uses: polygraphso/litmus@v1 + - uses: polygraphso/litmus@ # pin to a SHA; resolve from the v1 release with: - # Auto-discovers MCP servers (.mcp.json / .vscode/mcp.json / .cursor/mcp.json) - # and skills (SKILL.md dirs). Or name them explicitly: + # Name the targets explicitly (recommended). Grading runs a server's code, + # so on a public repo prefer an allowlist over discovering PR-controlled config: servers: | npm/@modelcontextprotocol/server-filesystem skills: | ./my-skill + # discover: "true" # opt in to auto-discovery (.mcp.json/.vscode/.cursor) — trusted repos only # min-grade: B # stricter than the default D/F gate # strict: "true" # also fail on targets that cannot be graded ``` -**Inputs:** `servers` · `skills` · `discover` (default `true`) · `min-grade` · `strict` · `working-directory` · `version` · `bearer`. **Outputs:** `result` · `failed` · `report`. +**Inputs:** `servers` · `skills` · `discover` (default `false`) · `min-grade` · `strict` · `working-directory` · `version` · `bearer`. **Outputs:** `result` · `failed` · `report`. -Not on GitHub? The gate is a plain command — `npx @polygraphso/litmus ci` — so it runs in any CI or -as a pre-commit hook. A grade is a measurement, not a guarantee: re-run the open harness to reproduce -any result. +**Security.** Grading a server **runs its code** (egress is Docker-sandboxed, but it still executes). +Trigger on `pull_request`, never `pull_request_target`. Keep `discover` off on public repos and name +targets explicitly — auto-discovered config is pull-request-controllable. `bearer` is sent as an +`Authorization` header to the target, so pass it only for an explicitly trusted, pinned remote — never +with discovery or on untrusted PRs, and keep it scoped and short-lived. + +Not on GitHub? The gate is a plain command — `npx @polygraphso/litmus@0.20.0 ci` (pin the version) — +so it runs in any CI or as a pre-commit hook. A grade is a measurement, not a guarantee: re-run the +open harness to reproduce any result. ## What litmus is diff --git a/action.yml b/action.yml index 2dce5ee..7c70093 100644 --- a/action.yml +++ b/action.yml @@ -1,4 +1,17 @@ # action.yml — Polygraph MCP gate (composite). Marketplace handle: polygraphso/litmus@v1 +# +# SECURITY — read before enabling on a public repo: +# • Pin this action to a commit SHA, not the mutable @v1 tag: +# uses: polygraphso/litmus@<40-char-sha> # v1.x.x +# • Trigger on `pull_request`, NEVER `pull_request_target` — the latter runs with +# repo secrets in the context of an untrusted fork PR. +# • Grading a server RUNS its code (egress is Docker-sandboxed, but it still +# executes). Do not run with secrets available on untrusted PRs. +# • `discover` is OFF by default: prefer an explicit `servers:` / `skills:` +# allowlist over auto-discovering PR-controlled config on public repos. +# • `bearer` is sent as an Authorization header to the target host — pass it only +# for an explicitly trusted, pinned remote, never with discovery or on fork PRs; +# keep it scoped and short-lived. name: "Polygraph MCP gate" description: "Fail the build if an MCP dependency grades D/F under the open polygraph behavioral litmus." author: "polygraph" @@ -15,9 +28,9 @@ inputs: required: false default: "" discover: - description: "Auto-discover targets from MCP config files (.mcp.json, .vscode/mcp.json, .cursor/mcp.json)." + description: "Auto-discover targets from MCP config files (.mcp.json, .vscode/mcp.json, .cursor/mcp.json). OFF by default — opt in only on trusted repos, since discovered targets are PR-controllable and grading runs their code." required: false - default: "true" + default: "false" min-grade: description: "Minimum acceptable grade (A|B|C|D). Default gates on D/F." required: false @@ -35,13 +48,13 @@ inputs: # Bump this in lockstep with each release that the v1 tag points at. description: "@polygraphso/litmus version to run." required: false - default: "0.18.2" + default: "0.20.0" api-url: - description: "Override the published-grade lookup API base URL." + description: "Override the published-grade lookup API base URL. HTTPS is enforced (http only for localhost). Point only at the official endpoint or a mirror you trust — an attacker-controlled endpoint can return fabricated grades." required: false default: "" bearer: - description: "Bearer token passed through to a gated remote (https) target." + description: "Bearer token sent as an Authorization header to a gated remote (https) target. Only for an explicitly trusted, pinned remote — never with discovery or on untrusted PRs; keep it scoped and short-lived." required: false default: "" outputs: diff --git a/packages/agent/src/gate.test.ts b/packages/agent/src/gate.test.ts index 78522fd..d15ea3f 100644 --- a/packages/agent/src/gate.test.ts +++ b/packages/agent/src/gate.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { gateDecision, fingerprintLiveSurface, type LiveTarget } from "./gate.js"; +import { gateDecision, fingerprintLiveSurface, DEFAULT_PASSING, PAYMENT_PASSING, type LiveTarget } from "./gate.js"; /** A fake MCP client that serves a fixed list of `tools/list` pages. */ function pagedClient(pages: Array<{ tools: Array<{ name: string }>; nextCursor?: string }>) { @@ -81,6 +81,47 @@ describe("gateDecision", () => { // a failing grade is still refused regardless of any version expect(gateDecision({ ...passing, overallGrade: "F", resolvedVersion: "1.2.3" }, live(FP)).action).toBe("refuse"); }); + + it("no longer accepts C by default (DEFAULT_PASSING is {A,B}; C is reserved)", () => { + expect(DEFAULT_PASSING.has("C")).toBe(false); + expect(gateDecision({ serverRef: REF, toolDefsFingerprint: FP, overallGrade: "C" }, live(FP)).action).toBe("refuse"); + }); + + it("PAYMENT_PASSING accepts only a local A (excludes a remote B)", () => { + const att = (grade: string) => ({ serverRef: REF, toolDefsFingerprint: FP, overallGrade: grade }); + expect(gateDecision(att("A"), live(FP), PAYMENT_PASSING).action).toBe("pay"); + expect(gateDecision(att("B"), live(FP), PAYMENT_PASSING).action).toBe("refuse"); + }); +}); + +describe("gateDecision — opt-in stricter rules (GateOptions)", () => { + const base = { serverRef: REF, toolDefsFingerprint: FP, overallGrade: "A" as const }; + + it("attester allowlist: refuses an unlisted signer, pays a listed one (case-insensitive)", () => { + const allow = new Set(["0xabc"]); + expect(gateDecision({ ...base, attester: "0xDEF" }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("refuse"); + expect(gateDecision({ ...base, attester: "0xABC" }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("pay"); + // fail closed when no attester is present + expect(gateDecision({ ...base }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("refuse"); + }); + + it("methodology allowlist: refuses an unaccepted version, pays an accepted one", () => { + const accept = new Set(["litmus-v10"]); + expect(gateDecision({ ...base, methodologyVersion: "litmus-v3" }, live(FP), undefined, undefined, { acceptedMethodologyVersions: accept }).action).toBe("refuse"); + expect(gateDecision({ ...base, methodologyVersion: "litmus-v10" }, live(FP), undefined, undefined, { acceptedMethodologyVersions: accept }).action).toBe("pay"); + }); + + it("requireEgressVerified: refuses a grade whose egress was never observed", () => { + expect(gateDecision({ ...base, overallGrade: "B", egressVerified: false }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("refuse"); + // missing flag also fails closed + expect(gateDecision({ ...base, overallGrade: "B" }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("refuse"); + // a local grade with egress verified passes + expect(gateDecision({ ...base, egressVerified: true }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("pay"); + }); + + it("with no options, the stricter rules are inert (decision unchanged)", () => { + expect(gateDecision({ ...base, attester: "0xanything", egressVerified: false }, live(FP)).action).toBe("pay"); + }); }); describe("fingerprintLiveSurface — full-surface (paginated) rug-pull check", () => { diff --git a/packages/agent/src/gate.ts b/packages/agent/src/gate.ts index 117daf9..165323d 100644 --- a/packages/agent/src/gate.ts +++ b/packages/agent/src/gate.ts @@ -10,7 +10,9 @@ * attested fingerprint → refuse (rug pull): the surface changed since it * was graded * 4. grade check — a failing grade → refuse, 0 spent - * All pass → pay. + * All pass → pay. A value/payment path can opt into stricter rules via + * `GateOptions` (attester allowlist, accepted methodology versions, and + * `requireEgressVerified` — which rejects remote/no-sandbox B grades). * * `gateDecision` is pure and unit-tested; `liveFingerprint` reuses the harness * and returns the connected server's canonical ref so the binding compares @@ -34,6 +36,16 @@ export interface AttestationView { revoked?: boolean; /** EAS expiry in unix seconds; 0n / undefined = no expiration. */ expirationTime?: bigint; + /** Account that signed the attestation. A self-minted grade is forgeable, so a + * caller routing value can pin an `allowedAttesters` set (or re-run the harness). */ + attester?: string; + /** Methodology version the grade was produced under (signed attestation data). + * Unlike `resolvedVersion`, this is not a live-oracle claim, so a caller may + * require it via `acceptedMethodologyVersions`. */ + methodologyVersion?: string; + /** True only when C-02 (egress) actually ran AND passed. False/undefined for + * remote or no-sandbox B grades, where egress was never observed. */ + egressVerified?: boolean; } export interface LiveTarget { @@ -53,14 +65,36 @@ export interface GateDecision { reason: string; } -/** Grades an agent will transact with. F (injection/leak) and D (egress) are out. */ -export const DEFAULT_PASSING = new Set(["A", "B", "C"]); +/** Grades an agent will transact with by default. F (injection/leak) and D + * (egress) are out; C is reserved/unassigned under the current methodology. */ +export const DEFAULT_PASSING = new Set(["A", "B"]); + +/** The bar for signed/value actions. Only a LOCAL A clears it: remote servers + * cap at B (egress unverified), so requiring A excludes egress-unverified grades. */ +export const PAYMENT_PASSING = new Set(["A"]); + +/** + * Optional, stricter trust rules — all default off, so the base decision is + * unchanged unless a caller opts in. Use these on a value/payment path. + */ +export interface GateOptions { + /** If set, the attestation's signer must be one of these (lowercased addresses). + * Self-minted grades are forgeable; an allowlist trades reproducibility for a + * known-signer assumption. */ + allowedAttesters?: Set; + /** If set, the grade's methodology version must be one of these. */ + acceptedMethodologyVersions?: Set; + /** Refuse unless C-02 (egress) actually ran clean. Rejects remote/no-sandbox B + * grades whose network behavior was never observed. */ + requireEgressVerified?: boolean; +} export function gateDecision( attestation: AttestationView | null, live: LiveTarget, passing: Set = DEFAULT_PASSING, now: bigint = BigInt(Math.floor(Date.now() / 1000)), + opts: GateOptions = {}, ): GateDecision { if (!attestation) { return { action: "refuse", reason: "no attestation — unevaluated server" }; @@ -82,9 +116,23 @@ export function gateDecision( if (attestation.toolDefsFingerprint.toLowerCase() !== live.fingerprint.toLowerCase()) { return { action: "refuse", reason: "rug pull — live tool surface differs from the graded one" }; } + // Provenance (opt-in): a self-minted grade is forgeable, so a value path can + // require a known signer. Fail closed when no attester is present. + if (opts.allowedAttesters && !(attestation.attester && opts.allowedAttesters.has(attestation.attester.toLowerCase()))) { + return { action: "refuse", reason: "attester not in allowlist — self-minted grades are forgeable; trust a known attester or re-run the harness" }; + } + // Methodology pinning (opt-in): refuse a grade from an unaccepted methodology. + if (opts.acceptedMethodologyVersions && !(attestation.methodologyVersion && opts.acceptedMethodologyVersions.has(attestation.methodologyVersion))) { + return { action: "refuse", reason: `methodology version ${attestation.methodologyVersion ?? "unknown"} not accepted` }; + } if (!passing.has(attestation.overallGrade)) { return { action: "refuse", reason: `failing grade ${attestation.overallGrade}` }; } + // Egress (opt-in, for signed/value actions): a remote or no-sandbox B never + // had its network behavior observed. Fail closed when the flag is missing. + if (opts.requireEgressVerified && attestation.egressVerified !== true) { + return { action: "refuse", reason: "egress unverified (remote or no-sandbox grade) — not eligible for signed actions" }; + } // The version is appended to the reason only — it is NOT a gate condition (no // refuse branch on version): there is no trustworthy live-version oracle, so // the fingerprint above remains the sole cryptographic anchor. diff --git a/packages/cli/src/ci.ts b/packages/cli/src/ci.ts index b2a9ff0..74d2dd7 100644 --- a/packages/cli/src/ci.ts +++ b/packages/cli/src/ci.ts @@ -208,6 +208,12 @@ export async function runCi(args: readonly string[]): Promise { return 0; } const opts = parseCiArgs(args); + if (opts.discover) { + const warn = + "auto-discovery is ON — targets read from repo config (.mcp.json / .vscode / .cursor) and SKILL.md dirs are pull-request-controllable, and grading a server runs its code. Don't enable on untrusted PRs with secrets; prefer explicit --server / --skill allowlists."; + process.stderr.write(`polygraphso ci: ${warn}\n`); + if (process.env.GITHUB_ACTIONS) process.stdout.write(`::warning::polygraph: ${oneLine(warn)}\n`); + } const results = await evaluate(opts); if (opts.json) { process.stdout.write(JSON.stringify(results) + "\n"); diff --git a/packages/litmus/src/index.ts b/packages/litmus/src/index.ts index 0090d96..eb41881 100644 --- a/packages/litmus/src/index.ts +++ b/packages/litmus/src/index.ts @@ -21,8 +21,8 @@ export * from "@polygraph/onchain"; // Agent-gate decision logic, re-exported explicitly to keep the public surface // narrow (the internal harness helpers aren't part of this package's API). -export { gateDecision, liveFingerprint, DEFAULT_PASSING } from "@polygraph/agent"; -export type { AttestationView, GateAction, GateDecision } from "@polygraph/agent"; +export { gateDecision, liveFingerprint, DEFAULT_PASSING, PAYMENT_PASSING } from "@polygraph/agent"; +export type { AttestationView, GateAction, GateDecision, GateOptions } from "@polygraph/agent"; // The run_litmus MCP tool's handler, exposed for embedding in a custom server. export { diff --git a/packages/onchain/src/read.ts b/packages/onchain/src/read.ts index f0a2b42..d819fef 100644 --- a/packages/onchain/src/read.ts +++ b/packages/onchain/src/read.ts @@ -10,9 +10,16 @@ */ import { Contract, JsonRpcProvider, ZeroHash } from "ethers"; +import { CATEGORY_STATUS_UINT8, type CategoryStatus } from "@polygraph/core"; import { decodeLitmusAttestation } from "./eas.js"; import { networkConfig, rpcUrl } from "./networks.js"; +/** Inverse of the on-chain uint8 verdict encoding (eas.ts). Unknown → "skipped" + * (fail-safe: an unrecognized code is treated as "not verified", never "pass"). */ +function uint8ToCategoryStatus(n: number): CategoryStatus { + return (Object.keys(CATEGORY_STATUS_UINT8) as CategoryStatus[]).find((k) => CATEGORY_STATUS_UINT8[k] === n) ?? "skipped"; +} + // EAS `getAttestation(bytes32)` → the on-chain `Attestation` struct (field order // per the deployed EAS contract). Named tuple components give ethers v6 named // accessors (att.uid / att.schema / att.data / att.attester / att.revocationTime @@ -64,6 +71,14 @@ export interface OnchainLitmusAttestation { revoked: boolean; /** Account that signed the attestation (self-mint model: any address). */ attester: string; + /** The litmus methodology version this grade was produced under — signed, + * on-chain data (the gate can require a known/accepted version). */ + methodologyVersion: string; + /** True only when the C-02 egress probe actually ran AND passed. False for + * remote or no-sandbox grades, where egress was skipped: such a grade caps + * at B but its network behavior was never observed, so a payment gate should + * not treat it like an egress-clean local A. */ + egressVerified: boolean; /** EAS expiry in unix seconds; 0n = no expiration. */ expirationTime: bigint; } @@ -92,6 +107,8 @@ export async function readAttestation(uid: string): Promise 0n, attester: String(att.attester), + methodologyVersion: String(d.methodologyVersion), + egressVerified: uint8ToCategoryStatus(Number(d.gradeC02)) === "pass", expirationTime: BigInt(att.expirationTime ?? 0n), }; } diff --git a/plugins/polygraph/skills/polygraph/SKILL.md b/plugins/polygraph/skills/polygraph/SKILL.md index fd03ee5..5ccbfdc 100644 --- a/plugins/polygraph/skills/polygraph/SKILL.md +++ b/plugins/polygraph/skills/polygraph/SKILL.md @@ -69,11 +69,13 @@ probe in depth. ## Check a grade -A sub-second lookup against published grades — **one command before your agent installs -anything:** +A sub-second lookup against published grades. This runs the `polygraphso` **lookup** CLI: it +reads a published grade and does **not** install or execute the target server. `npx` does fetch +and run our CLI, though — so it's a lookup, not a "no-install" check; pin the version in any +automated or trust context: ```bash -$ npx polygraphso check npm/@modelcontextprotocol/server-filesystem +$ npx polygraphso@ check npm/@modelcontextprotocol/server-filesystem → polygraph: A · litmus-v10 · 2026-06-26 → details → polygraph.so/#checks ``` @@ -96,11 +98,20 @@ The highest-value use at runtime: **gate an MCP server through its grade before uses it, pays it, or routes a transaction through it.** Polygraph is the *verify* step that runs ahead of whatever your agent does next. Two checks, both required: -1. **Grade meets your bar.** Default: accept A/B, refuse D/F. (A remote server's ceiling is B — - see "Reading a B" above, and don't penalize it for that.) +1. **Grade meets your bar.** Default (`DEFAULT_PASSING`): accept A/B, refuse D/F. **For signed + actions or payments, raise the bar to a local A** (`PAYMENT_PASSING`, or `gateDecision(…, { + requireEgressVerified: true })`): a remote server caps at B because its egress was never + observed, so a B is exactly the case where network exfiltration wasn't tested — don't auto-route + value through it; require a local A or a manual review. 2. **Fingerprint still matches.** An attestation is only valid for the exact tool surface it graded. Recompute the server's **live** tool-surface fingerprint and require it to equal the attested one before acting — a built-in rug-pull check against a graded-then-swapped server. +3. **Attestation is trustworthy enough for the action.** `readAttestation` already binds to our + EAS schema (fail-closed) and a fixed Base RPC, and `gateDecision` checks revocation, expiry, and + the server-ref binding. A self-minted grade is still **forgeable**, so before routing value also + require an **attester allowlist** (`gateDecision(…, { allowedAttesters })`) and an accepted + **methodology version** — or, for the strongest assurance, **re-run the open harness** yourself + (`run_litmus`) and compare. Reproducibility, not the signature, is what makes a grade trustworthy. Drop the `verify_attestation` MCP tool in front of execution, or use the `gateDecision` helper. @@ -149,6 +160,9 @@ polygraphso-litmus litmus ./path/to/local-mcp-server --json - **Node ≥ 18.** **Docker is optional** but recommended — without it the egress probe (C-02) is skipped and the grade is **capped at B** (as is any remote/HTTP target, which can't be sandboxed). +- **`--bearer` is sent as an `Authorization` header to the target.** Pass it only to a remote you + explicitly trust and have pinned; use a scoped, short-lived token — never on an + auto-discovered or untrusted target. - **Exit codes are CI-friendly:** non-zero on a failing grade (D/F), zero on A/B — drop it into a pipeline to gate dependencies. @@ -160,21 +174,27 @@ Flags, env vars, `--json` output, and the `check` / `list` subcommands are all i ## Gate your CI on grades Turn the grade into a build check: the **polygraph CI gate** fails a build when an MCP server or an -Agent Skill grades D/F. Add the GitHub Action to a repo — +Agent Skill grades D/F. Add the GitHub Action to a repo — pin it to a commit SHA (not the mutable +`@v1` tag) and trigger on `pull_request`, never `pull_request_target`: ```yaml -- uses: polygraphso/litmus@v1 +on: [pull_request] # not pull_request_target +# … +- uses: polygraphso/litmus@ # pin a SHA for a security gate with: - servers: | + servers: | # name targets explicitly (recommended) npm/@modelcontextprotocol/server-filesystem skills: | ./my-skill ``` -— or run it anywhere with `npx @polygraphso/litmus ci`. It auto-discovers MCP servers -(`.mcp.json` / `.vscode` / `.cursor`) and skills (`SKILL.md` dirs), grades each, and fails on D/F; -un-gradeable targets warn unless `strict`. Full setup, inputs, and the run-anywhere command: -[`references/ci-gate.md`](references/ci-gate.md). +— or run it anywhere with `npx @polygraphso/litmus@ ci` (pin the version). **Grading a +server runs its code** (egress is Docker-sandboxed, but it still executes), so on a public repo +keep auto-discovery **off** (the default) and name targets explicitly rather than grading +PR-controlled `.mcp.json` / `.vscode` / `.cursor` config. Un-gradeable targets warn unless `strict`. +A **skill** grade is a **static** scan, not behavioral proof — treat a skill that runs install-time +code or carries transaction instructions as needing manual review regardless of its letter. Full +setup, inputs, and the security notes: [`references/ci-gate.md`](references/ci-gate.md). --- diff --git a/plugins/polygraph/skills/polygraph/references/ci-gate.md b/plugins/polygraph/skills/polygraph/references/ci-gate.md index 938c20b..ae20f85 100644 --- a/plugins/polygraph/skills/polygraph/references/ci-gate.md +++ b/plugins/polygraph/skills/polygraph/references/ci-gate.md @@ -15,7 +15,7 @@ target that misbehaves under the probes, not one that evades them. ```yaml # .github/workflows/mcp-gate.yml name: mcp-gate -on: [pull_request] +on: [pull_request] # NOT pull_request_target — that runs with secrets in an untrusted fork's context permissions: contents: read jobs: @@ -23,20 +23,41 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: polygraphso/litmus@v1 + - uses: polygraphso/litmus@ # pin a SHA, not the mutable @v1 tag, for a security gate with: - # Auto-discovers MCP servers (.mcp.json / .vscode/mcp.json / .cursor/mcp.json) - # and skills (SKILL.md dirs). Or list them explicitly: + # Name targets explicitly (recommended on public repos). Discovery is OFF by default. servers: | npm/@modelcontextprotocol/server-filesystem skills: | ./my-skill + # discover: "true" # opt in to .mcp.json/.vscode/.cursor + SKILL.md discovery — trusted repos only # min-grade: B # stricter than the default D/F gate # strict: "true" # also fail on targets that can't be graded ``` -That is the whole setup. On each PR the action grades every MCP server **and** every skill, and -fails the job on any **D** or **F**. +That is the whole setup. On each PR the action grades every named (or, with `discover: true`, +discovered) MCP server **and** skill, and fails the job on any **D** or **F**. + +--- + +## Security on CI (read before enabling on a public repo) + +Grading a server **runs its code** — the harness connects and exercises its tools (egress is +contained in a default-deny Docker sandbox, but the server still executes). On CI that means: + +- **Trigger on `pull_request`, never `pull_request_target`.** `pull_request_target` runs with the + base repo's secrets in the context of an untrusted fork's code — combined with a gate that + executes targets, that is a credential-exfiltration path. +- **Don't expose secrets to untrusted PRs.** Fork PRs on `pull_request` get no secrets by default; + keep it that way. Don't pass tokens/`bearer` into a job that grades fork-controlled targets. +- **Prefer an explicit allowlist over discovery on public repos.** Discovery is **off by default**; + a PR can add a `.mcp.json` / `.vscode` / `.cursor` entry or a `SKILL.md` dir pointing at an + attacker-chosen package, and `discover: true` would grade (run) it. Name `servers:` / `skills:` + explicitly, or only enable discovery where the config isn't attacker-controllable. +- **Pin the action to a commit SHA** (`polygraphso/litmus@`), not the mutable `@v1` tag, and + keep the `version` input pinned so the harness is reproducible. +- **`bearer` is sent as an `Authorization` header to the target.** Pass it only for an explicitly + trusted, pinned remote; use a scoped, short-lived token; never with discovery or on untrusted PRs. --- @@ -74,14 +95,15 @@ against the server (see "Reading a B" in [`../SKILL.md`](../SKILL.md)). | Input | Default | Description | |---|---|---| -| `servers` | — | Explicit MCP refs (newline- or comma-separated). Merged with auto-discovery. | -| `skills` | — | Explicit skill directories (newline- or comma-separated). Merged with auto-discovery. | -| `discover` | `true` | Discover MCP servers from config files and skills from `SKILL.md`. | +| `servers` | — | Explicit MCP refs (newline- or comma-separated). Merged with discovery when on. | +| `skills` | — | Explicit skill directories (newline- or comma-separated). Merged with discovery when on. | +| `discover` | `false` | Discover targets from config files and `SKILL.md`. Off by default — opt in on trusted repos only (discovered targets are PR-controllable and grading runs their code). | | `min-grade` | — | Minimum acceptable grade (`A`–`D`). Default gates on D/F. | | `strict` | `false` | Treat un-gradeable targets as failures, not warnings. | | `working-directory` | `.` | Directory scanned for MCP config files and `SKILL.md` bundles. | -| `version` | pinned | `@polygraphso/litmus` version to run. | -| `bearer` | — | Token passed through to a gated remote (HTTPS) server. | +| `version` | pinned | `@polygraphso/litmus` version to run (keep it pinned). | +| `api-url` | — | Override the lookup endpoint. HTTPS enforced; point only at the official endpoint or a mirror you trust. | +| `bearer` | — | Sent as an `Authorization` header to a gated remote (HTTPS) target. Trusted pinned remote only; scoped, short-lived; never with discovery or on untrusted PRs. | Outputs: `result` (`pass` / `fail`), `failed` (count), and `report` (a JSON array of per-target results, each with its `kind` of `server` or `skill`) — read them from a later step via @@ -113,15 +135,18 @@ The gate is a plain command in the harness, so it also works in any other CI or check: ```bash -# Gate the MCP servers and skills discovered in this repo: -npx @polygraphso/litmus ci +# Gate the MCP servers and skills discovered in your own repo (pin the version): +npx @polygraphso/litmus@ ci -# Or name targets, fail below B, treat un-gradeable as a failure: -npx @polygraphso/litmus ci --server npm/@scope/your-mcp --skill ./your-skill --min-grade B --strict +# In untrusted CI, turn discovery off and name targets explicitly: +npx @polygraphso/litmus@ ci --no-discover --server npm/@scope/your-mcp --skill ./your-skill --min-grade B --strict ``` It exits non-zero on a gated target, so any pipeline can use it. `--json` emits the full per-target -report; `--no-discover` and `--no-lookup` narrow what it does. +report; `--no-discover` and `--no-lookup` narrow what it does. Note the asymmetry: the **standalone +CLI discovers by default** (convenient for gating your own repo, and it prints a warning when it +does), while the **GitHub Action defaults discovery off** (the CI/PR surface, where config is +attacker-controllable). In untrusted CI, pass `--no-discover` and an explicit allowlist. --- @@ -130,8 +155,11 @@ report; `--no-discover` and `--no-lookup` narrow what it does. - **Reproducibility is the trust anchor.** The harness is open and deterministic, so the gate's verdict is falsifiable — not a black box. - A passing gate means *these targets did not misbehave under these probes* — **not** that they are - safe in every situation. A skill grade is a **static** read of its text and bundle; a server grade - is behavioral. **Evasion** (a server that detects the test context) is the disclosed residual limit. + safe in every situation. A skill grade is a **static** read of its text and bundle — **not + equivalent** to a behavioral server grade: static scanning can't see a command built or fetched at + runtime, or a bundled script that runs on install. Treat a skill that executes install-time code or + carries transaction instructions as needing manual security review regardless of its letter. + **Evasion** (a server that detects the test context) is the disclosed residual limit. - The gate does not replace your own runtime guards (for example, transaction-verification checks before signing or paying — see the "Verify before you trust" section of [`../SKILL.md`](../SKILL.md)). diff --git a/plugins/polygraph/skills/polygraph/references/cli.md b/plugins/polygraph/skills/polygraph/references/cli.md index 0e25634..852a7e9 100644 --- a/plugins/polygraph/skills/polygraph/references/cli.md +++ b/plugins/polygraph/skills/polygraph/references/cli.md @@ -17,8 +17,8 @@ registries. The harness also accepts a raw `https://…/mcp` URL or a local path ## `polygraphso` — look up a grade ```bash -npx polygraphso check npm/@modelcontextprotocol/server-filesystem # sub-second lookup -npm i -g polygraphso # or install globally +npx polygraphso@ check npm/@modelcontextprotocol/server-filesystem # sub-second lookup +npm i -g polygraphso # or install globally polygraphso check // # latest published grade polygraphso list [--json] # every graded server + its grade @@ -26,6 +26,10 @@ polygraphso --version polygraphso --help ``` +`check` runs the `polygraphso` **lookup** CLI — it reads a published grade and does **not** install +or execute the target server. `npx` still fetches and runs our CLI, so this is a lookup, not a +"no-install" trust check; **pin the version** (`polygraphso@`) in any automated context. + Grades are live. Example output (the list rows are **illustrative** — a grade is point-in-time evidence, so the live set at `polygraphso list` / polygraph.so is the source of truth): @@ -48,7 +52,11 @@ A tracked-but-ungraded server reports `not available yet` with a `polygraph.so/notify?for=` link; its grade lands as the litmus harness covers more of the ecosystem. -Config: `POLYGRAPH_API_URL` overrides the lookup endpoint (useful for local testing). +Config: `POLYGRAPH_API_URL` overrides the lookup endpoint (useful for local testing). HTTPS is +enforced (plain `http` only for `localhost`), so it isn't a MITM vector — but the residual risk is +endpoint **trust**: for any execution or payment decision, point it only at the official +`polygraph.so` endpoint or a mirror you control. An attacker-supplied endpoint can return fabricated +grades; never accept one from untrusted config. --- @@ -81,7 +89,7 @@ fingerprint. | Flag | Effect | |------|--------| | `--json` | Emit the full canonical `EvidenceBundle` instead of the human summary. | -| `--bearer ` | Bearer auth for an HTTP target (or set `LITMUS_BEARER`). | +| `--bearer ` | Bearer auth for an HTTP target (or set `LITMUS_BEARER`). Sent as `Authorization: Bearer` to the target — trusted, pinned remote only; scoped and short-lived; never on an auto-discovered or untrusted target. | | `--header "Key: Value"` | Add a custom request header (repeatable). | | `--allow-state-changing` | Permit calls to state-mutating tools during dynamic probes. | @@ -89,8 +97,8 @@ fingerprint. | Var | Effect | |-----|--------| -| `POLYGRAPH_API_URL` | Set to `https://polygraph.so` to pin the evidence bundle and get a publish/mint hand-off URL. Unset = fully offline run. | -| `LITMUS_BEARER` | Bearer token for HTTP auth. | +| `POLYGRAPH_API_URL` | Set to `https://polygraph.so` to pin the evidence bundle and get a publish/mint hand-off URL. Unset = fully offline run. HTTPS enforced; point only at an endpoint you trust for execution decisions. | +| `LITMUS_BEARER` | Bearer token for HTTP auth. Sent to the target — trusted pinned remote only; scoped and short-lived. | | `LITMUS_STDIO_ISOLATION` | Set to `docker` to **require** Docker isolation for stdio targets (fail-closed if Docker is unavailable). | ### Requirements & exit codes @@ -151,11 +159,20 @@ verify-then-execute pattern. ## Programmatic use ```ts -import { runLitmus, gateDecision, liveFingerprint, readAttestation } from "@polygraphso/litmus"; +import { runLitmus, gateDecision, liveFingerprint, readAttestation, PAYMENT_PASSING } from "@polygraphso/litmus"; const bundle = await runLitmus("npm/@scope/server"); // → EvidenceBundle { grade, categories, fingerprint, … } -const attestation = await readAttestation("npm/@scope/server"); +const attestation = await readAttestation("npm/@scope/server"); // binds to our EAS schema + a fixed Base RPC (fail-closed) const live = await liveFingerprint("npm/@scope/server"); -const decision = gateDecision(attestation, live); // → { action: "pay" | "refuse", reason } + +// Read-only / low-value: grade A/B + live fingerprint match is enough. +const ok = gateDecision(attestation, live).action === "pay"; + +// Signed actions / payments — raise the bar. A self-minted grade is forgeable, so: +const payDecision = gateDecision(attestation, live, PAYMENT_PASSING, undefined, { + requireEgressVerified: true, // exclude remote/no-sandbox B (egress never observed) + allowedAttesters: new Set(["0x…"]), // trust a known signer… + acceptedMethodologyVersions: new Set(["litmus-v10"]), +}); // …or, strongest: re-run runLitmus() yourself and compare. ``` diff --git a/plugins/polygraph/skills/polygraph/references/methodology.md b/plugins/polygraph/skills/polygraph/references/methodology.md index 122e8f9..c78eb83 100644 --- a/plugins/polygraph/skills/polygraph/references/methodology.md +++ b/plugins/polygraph/skills/polygraph/references/methodology.md @@ -112,7 +112,11 @@ verifiable onchain by any agent. - **Reproducibility is the anchor.** Open + deterministic harness ⇒ a false grade is falsifiable by re-running it. - **A published grade is forgeable by its signer.** Trust comes from reproducibility and the - fingerprint recheck, not from the signature alone. + fingerprint recheck, not from the signature alone. The read path binds to our EAS schema and a + fixed Base RPC (fail-closed), and `gateDecision` checks revocation, expiry, and the server-ref + + fingerprint binding. **Before routing value**, also require an attester allowlist and an accepted + methodology version (`gateDecision` options), demand a grade whose **egress was actually verified** + (a local A, not a remote/no-sandbox B), or — strongest — re-run the harness yourself and compare. - **Evasion is the residual limit:** a server that detects the test context could pass grading and misbehave in production. - Independent/unforgeable upgrades (staked bonds, zkTLS, TEE-backed runs, independent