polygraphso · RubenSousaDinis · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/README.md b/README.md
@@ -26,36 +26,43 @@ behavioral litmus. For servers it is hybrid — a fast lookup of the published g
 when ungraded; for skills it is a fast static scan. Un-gradeable targets warn unless `strict`.
 
 It's on the **[GitHub Marketplace](https://github.com/marketplace/actions/polygraph-mcp-gate)** as
-`polygraphso/litmus@v1` — drop it into a workflow:
+`polygraphso/litmus@v1`. For a security gate, pin to a commit SHA rather than the mutable `@v1` tag:
 
 ```yaml
 # .github/workflows/mcp-gate.yml
 name: mcp-gate
-on: [pull_request]
+on: [pull_request]            # NOT pull_request_target — that exposes secrets to fork PRs
 permissions:
   contents: read
 jobs:
   gate:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v5
-      - uses: polygraphso/litmus@v1
+      - uses: polygraphso/litmus@<commit-sha>   # pin to a SHA; resolve from the v1 release
         with:
-          # Auto-discovers MCP servers (.mcp.json / .vscode/mcp.json / .cursor/mcp.json)
-          # and skills (SKILL.md dirs). Or name them explicitly:
+          # Name the targets explicitly (recommended). Grading runs a server's code,
+          # so on a public repo prefer an allowlist over discovering PR-controlled config:
           servers: |
             npm/@modelcontextprotocol/server-filesystem
           skills: |
             ./my-skill
+          # discover: "true"  # opt in to auto-discovery (.mcp.json/.vscode/.cursor) — trusted repos only
           # min-grade: B      # stricter than the default D/F gate
           # strict: "true"    # also fail on targets that cannot be graded
 ```
 
-**Inputs:** `servers` · `skills` · `discover` (default `true`) · `min-grade` · `strict` · `working-directory` · `version` · `bearer`. **Outputs:** `result` · `failed` · `report`.
+**Inputs:** `servers` · `skills` · `discover` (default `false`) · `min-grade` · `strict` · `working-directory` · `version` · `bearer`. **Outputs:** `result` · `failed` · `report`.
 
-Not on GitHub? The gate is a plain command — `npx @polygraphso/litmus ci` — so it runs in any CI or
-as a pre-commit hook. A grade is a measurement, not a guarantee: re-run the open harness to reproduce
-any result.
+**Security.** Grading a server **runs its code** (egress is Docker-sandboxed, but it still executes).
+Trigger on `pull_request`, never `pull_request_target`. Keep `discover` off on public repos and name
+targets explicitly — auto-discovered config is pull-request-controllable. `bearer` is sent as an
+`Authorization` header to the target, so pass it only for an explicitly trusted, pinned remote — never
+with discovery or on untrusted PRs, and keep it scoped and short-lived.
+
+Not on GitHub? The gate is a plain command — `npx @polygraphso/litmus@0.20.0 ci` (pin the version) —
+so it runs in any CI or as a pre-commit hook. A grade is a measurement, not a guarantee: re-run the
+open harness to reproduce any result.
 
 ## What litmus is
 

diff --git a/action.yml b/action.yml
@@ -1,4 +1,17 @@
 # action.yml — Polygraph MCP gate (composite). Marketplace handle: polygraphso/litmus@v1
+#
+# SECURITY — read before enabling on a public repo:
+#   • Pin this action to a commit SHA, not the mutable @v1 tag:
+#       uses: polygraphso/litmus@<40-char-sha>  # v1.x.x
+#   • Trigger on `pull_request`, NEVER `pull_request_target` — the latter runs with
+#     repo secrets in the context of an untrusted fork PR.
+#   • Grading a server RUNS its code (egress is Docker-sandboxed, but it still
+#     executes). Do not run with secrets available on untrusted PRs.
+#   • `discover` is OFF by default: prefer an explicit `servers:` / `skills:`
+#     allowlist over auto-discovering PR-controlled config on public repos.
+#   • `bearer` is sent as an Authorization header to the target host — pass it only
+#     for an explicitly trusted, pinned remote, never with discovery or on fork PRs;
+#     keep it scoped and short-lived.
 name: "Polygraph MCP gate"
 description: "Fail the build if an MCP dependency grades D/F under the open polygraph behavioral litmus."
 author: "polygraph"
@@ -15,9 +28,9 @@ inputs:
     required: false
     default: ""
   discover:
-    description: "Auto-discover targets from MCP config files (.mcp.json, .vscode/mcp.json, .cursor/mcp.json)."
+    description: "Auto-discover targets from MCP config files (.mcp.json, .vscode/mcp.json, .cursor/mcp.json). OFF by default — opt in only on trusted repos, since discovered targets are PR-controllable and grading runs their code."
     required: false
-    default: "true"
+    default: "false"
   min-grade:
     description: "Minimum acceptable grade (A|B|C|D). Default gates on D/F."
     required: false
@@ -35,13 +48,13 @@ inputs:
     # Bump this in lockstep with each release that the v1 tag points at.
     description: "@polygraphso/litmus version to run."
     required: false
-    default: "0.18.2"
+    default: "0.20.0"
   api-url:
-    description: "Override the published-grade lookup API base URL."
+    description: "Override the published-grade lookup API base URL. HTTPS is enforced (http only for localhost). Point only at the official endpoint or a mirror you trust — an attacker-controlled endpoint can return fabricated grades."
     required: false
     default: ""
   bearer:
-    description: "Bearer token passed through to a gated remote (https) target."
+    description: "Bearer token sent as an Authorization header to a gated remote (https) target. Only for an explicitly trusted, pinned remote — never with discovery or on untrusted PRs; keep it scoped and short-lived."
     required: false
     default: ""
 outputs:

diff --git a/packages/agent/src/gate.test.ts b/packages/agent/src/gate.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect } from "vitest";
-import { gateDecision, fingerprintLiveSurface, type LiveTarget } from "./gate.js";
+import { gateDecision, fingerprintLiveSurface, DEFAULT_PASSING, PAYMENT_PASSING, type LiveTarget } from "./gate.js";
 
 /** A fake MCP client that serves a fixed list of `tools/list` pages. */
 function pagedClient(pages: Array<{ tools: Array<{ name: string }>; nextCursor?: string }>) {
@@ -81,6 +81,47 @@ describe("gateDecision", () => {
     // a failing grade is still refused regardless of any version
     expect(gateDecision({ ...passing, overallGrade: "F", resolvedVersion: "1.2.3" }, live(FP)).action).toBe("refuse");
   });
+
+  it("no longer accepts C by default (DEFAULT_PASSING is {A,B}; C is reserved)", () => {
+    expect(DEFAULT_PASSING.has("C")).toBe(false);
+    expect(gateDecision({ serverRef: REF, toolDefsFingerprint: FP, overallGrade: "C" }, live(FP)).action).toBe("refuse");
+  });
+
+  it("PAYMENT_PASSING accepts only a local A (excludes a remote B)", () => {
+    const att = (grade: string) => ({ serverRef: REF, toolDefsFingerprint: FP, overallGrade: grade });
+    expect(gateDecision(att("A"), live(FP), PAYMENT_PASSING).action).toBe("pay");
+    expect(gateDecision(att("B"), live(FP), PAYMENT_PASSING).action).toBe("refuse");
+  });
+});
+
+describe("gateDecision — opt-in stricter rules (GateOptions)", () => {
+  const base = { serverRef: REF, toolDefsFingerprint: FP, overallGrade: "A" as const };
+
+  it("attester allowlist: refuses an unlisted signer, pays a listed one (case-insensitive)", () => {
+    const allow = new Set(["0xabc"]);
+    expect(gateDecision({ ...base, attester: "0xDEF" }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("refuse");
+    expect(gateDecision({ ...base, attester: "0xABC" }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("pay");
+    // fail closed when no attester is present
+    expect(gateDecision({ ...base }, live(FP), undefined, undefined, { allowedAttesters: allow }).action).toBe("refuse");
+  });
+
+  it("methodology allowlist: refuses an unaccepted version, pays an accepted one", () => {
+    const accept = new Set(["litmus-v10"]);
+    expect(gateDecision({ ...base, methodologyVersion: "litmus-v3" }, live(FP), undefined, undefined, { acceptedMethodologyVersions: accept }).action).toBe("refuse");
+    expect(gateDecision({ ...base, methodologyVersion: "litmus-v10" }, live(FP), undefined, undefined, { acceptedMethodologyVersions: accept }).action).toBe("pay");
+  });
+
+  it("requireEgressVerified: refuses a grade whose egress was never observed", () => {
+    expect(gateDecision({ ...base, overallGrade: "B", egressVerified: false }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("refuse");
+    // missing flag also fails closed
+    expect(gateDecision({ ...base, overallGrade: "B" }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("refuse");
+    // a local grade with egress verified passes
+    expect(gateDecision({ ...base, egressVerified: true }, live(FP), undefined, undefined, { requireEgressVerified: true }).action).toBe("pay");
+  });
+
+  it("with no options, the stricter rules are inert (decision unchanged)", () => {
+    expect(gateDecision({ ...base, attester: "0xanything", egressVerified: false }, live(FP)).action).toBe("pay");
+  });
 });
 
 describe("fingerprintLiveSurface — full-surface (paginated) rug-pull check", () => {

diff --git a/packages/agent/src/gate.ts b/packages/agent/src/gate.ts
@@ -10,7 +10,9 @@
  *      attested fingerprint → refuse (rug pull): the surface changed since it
  *      was graded
  *   4. grade check — a failing grade → refuse, 0 spent
- * All pass → pay.
+ * All pass → pay. A value/payment path can opt into stricter rules via
+ * `GateOptions` (attester allowlist, accepted methodology versions, and
+ * `requireEgressVerified` — which rejects remote/no-sandbox B grades).
  *
  * `gateDecision` is pure and unit-tested; `liveFingerprint` reuses the harness
  * and returns the connected server's canonical ref so the binding compares
@@ -34,6 +36,16 @@ export interface AttestationView {
   revoked?: boolean;
   /** EAS expiry in unix seconds; 0n / undefined = no expiration. */
   expirationTime?: bigint;
+  /** Account that signed the attestation. A self-minted grade is forgeable, so a
+   *  caller routing value can pin an `allowedAttesters` set (or re-run the harness). */
+  attester?: string;
+  /** Methodology version the grade was produced under (signed attestation data).
+   *  Unlike `resolvedVersion`, this is not a live-oracle claim, so a caller may
+   *  require it via `acceptedMethodologyVersions`. */
+  methodologyVersion?: string;
+  /** True only when C-02 (egress) actually ran AND passed. False/undefined for
+   *  remote or no-sandbox B grades, where egress was never observed. */
+  egressVerified?: boolean;
 }
 
 export interface LiveTarget {
@@ -53,14 +65,36 @@ export interface GateDecision {
   reason: string;
 }
 
-/** Grades an agent will transact with. F (injection/leak) and D (egress) are out. */
-export const DEFAULT_PASSING = new Set(["A", "B", "C"]);
+/** Grades an agent will transact with by default. F (injection/leak) and D
+ *  (egress) are out; C is reserved/unassigned under the current methodology. */
+export const DEFAULT_PASSING = new Set(["A", "B"]);
+
+/** The bar for signed/value actions. Only a LOCAL A clears it: remote servers
+ *  cap at B (egress unverified), so requiring A excludes egress-unverified grades. */
+export const PAYMENT_PASSING = new Set(["A"]);
+
+/**
+ * Optional, stricter trust rules — all default off, so the base decision is
+ * unchanged unless a caller opts in. Use these on a value/payment path.
+ */
+export interface GateOptions {
+  /** If set, the attestation's signer must be one of these (lowercased addresses).
+   *  Self-minted grades are forgeable; an allowlist trades reproducibility for a
+   *  known-signer assumption. */
+  allowedAttesters?: Set<string>;
+  /** If set, the grade's methodology version must be one of these. */
+  acceptedMethodologyVersions?: Set<string>;
+  /** Refuse unless C-02 (egress) actually ran clean. Rejects remote/no-sandbox B
+   *  grades whose network behavior was never observed. */
+  requireEgressVerified?: boolean;
+}
 
 export function gateDecision(
   attestation: AttestationView | null,
   live: LiveTarget,
   passing: Set<string> = DEFAULT_PASSING,
   now: bigint = BigInt(Math.floor(Date.now() / 1000)),
+  opts: GateOptions = {},
 ): GateDecision {
   if (!attestation) {
     return { action: "refuse", reason: "no attestation — unevaluated server" };
@@ -82,9 +116,23 @@ export function gateDecision(
   if (attestation.toolDefsFingerprint.toLowerCase() !== live.fingerprint.toLowerCase()) {
     return { action: "refuse", reason: "rug pull — live tool surface differs from the graded one" };
   }
+  // Provenance (opt-in): a self-minted grade is forgeable, so a value path can
+  // require a known signer. Fail closed when no attester is present.
+  if (opts.allowedAttesters && !(attestation.attester && opts.allowedAttesters.has(attestation.attester.toLowerCase()))) {
+    return { action: "refuse", reason: "attester not in allowlist — self-minted grades are forgeable; trust a known attester or re-run the harness" };
+  }
+  // Methodology pinning (opt-in): refuse a grade from an unaccepted methodology.
+  if (opts.acceptedMethodologyVersions && !(attestation.methodologyVersion && opts.acceptedMethodologyVersions.has(attestation.methodologyVersion))) {
+    return { action: "refuse", reason: `methodology version ${attestation.methodologyVersion ?? "unknown"} not accepted` };
+  }
   if (!passing.has(attestation.overallGrade)) {
     return { action: "refuse", reason: `failing grade ${attestation.overallGrade}` };
   }
+  // Egress (opt-in, for signed/value actions): a remote or no-sandbox B never
+  // had its network behavior observed. Fail closed when the flag is missing.
+  if (opts.requireEgressVerified && attestation.egressVerified !== true) {
+    return { action: "refuse", reason: "egress unverified (remote or no-sandbox grade) — not eligible for signed actions" };
+  }
   // The version is appended to the reason only — it is NOT a gate condition (no
   // refuse branch on version): there is no trustworthy live-version oracle, so
   // the fingerprint above remains the sole cryptographic anchor.

diff --git a/packages/cli/src/ci.ts b/packages/cli/src/ci.ts
@@ -208,6 +208,12 @@ export async function runCi(args: readonly string[]): Promise<number> {
     return 0;
   }
   const opts = parseCiArgs(args);
+  if (opts.discover) {
+    const warn =
+      "auto-discovery is ON — targets read from repo config (.mcp.json / .vscode / .cursor) and SKILL.md dirs are pull-request-controllable, and grading a server runs its code. Don't enable on untrusted PRs with secrets; prefer explicit --server / --skill allowlists.";
+    process.stderr.write(`polygraphso ci: ${warn}\n`);
+    if (process.env.GITHUB_ACTIONS) process.stdout.write(`::warning::polygraph: ${oneLine(warn)}\n`);
+  }
   const results = await evaluate(opts);
   if (opts.json) {
     process.stdout.write(JSON.stringify(results) + "\n");

diff --git a/packages/litmus/src/index.ts b/packages/litmus/src/index.ts
@@ -21,8 +21,8 @@ export * from "@polygraph/onchain";
 
 // Agent-gate decision logic, re-exported explicitly to keep the public surface
 // narrow (the internal harness helpers aren't part of this package's API).
-export { gateDecision, liveFingerprint, DEFAULT_PASSING } from "@polygraph/agent";
-export type { AttestationView, GateAction, GateDecision } from "@polygraph/agent";
+export { gateDecision, liveFingerprint, DEFAULT_PASSING, PAYMENT_PASSING } from "@polygraph/agent";
+export type { AttestationView, GateAction, GateDecision, GateOptions } from "@polygraph/agent";
 
 // The run_litmus MCP tool's handler, exposed for embedding in a custom server.
 export {

diff --git a/packages/onchain/src/read.ts b/packages/onchain/src/read.ts
@@ -10,9 +10,16 @@
  */
 
 import { Contract, JsonRpcProvider, ZeroHash } from "ethers";
+import { CATEGORY_STATUS_UINT8, type CategoryStatus } from "@polygraph/core";
 import { decodeLitmusAttestation } from "./eas.js";
 import { networkConfig, rpcUrl } from "./networks.js";
 
+/** Inverse of the on-chain uint8 verdict encoding (eas.ts). Unknown → "skipped"
+ *  (fail-safe: an unrecognized code is treated as "not verified", never "pass"). */
+function uint8ToCategoryStatus(n: number): CategoryStatus {
+  return (Object.keys(CATEGORY_STATUS_UINT8) as CategoryStatus[]).find((k) => CATEGORY_STATUS_UINT8[k] === n) ?? "skipped";
+}
+
 // EAS `getAttestation(bytes32)` → the on-chain `Attestation` struct (field order
 // per the deployed EAS contract). Named tuple components give ethers v6 named
 // accessors (att.uid / att.schema / att.data / att.attester / att.revocationTime
@@ -64,6 +71,14 @@ export interface OnchainLitmusAttestation {
   revoked: boolean;
   /** Account that signed the attestation (self-mint model: any address). */
   attester: string;
+  /** The litmus methodology version this grade was produced under — signed,
+   *  on-chain data (the gate can require a known/accepted version). */
+  methodologyVersion: string;
+  /** True only when the C-02 egress probe actually ran AND passed. False for
+   *  remote or no-sandbox grades, where egress was skipped: such a grade caps
+   *  at B but its network behavior was never observed, so a payment gate should
+   *  not treat it like an egress-clean local A. */
+  egressVerified: boolean;
   /** EAS expiry in unix seconds; 0n = no expiration. */
   expirationTime: bigint;
 }
@@ -92,6 +107,8 @@ export async function readAttestation(uid: string): Promise<OnchainLitmusAttesta
     resolvedVersion: (d.resolvedVersion as string) || null,
     revoked: att.revocationTime > 0n,
     attester: String(att.attester),
+    methodologyVersion: String(d.methodologyVersion),
+    egressVerified: uint8ToCategoryStatus(Number(d.gradeC02)) === "pass",
     expirationTime: BigInt(att.expirationTime ?? 0n),
   };
 }