diff --git a/cli/tests/e2e.test.ts b/cli/tests/e2e.test.ts index 66fa6ae..430077d 100644 --- a/cli/tests/e2e.test.ts +++ b/cli/tests/e2e.test.ts @@ -90,6 +90,17 @@ beforeAll(async () => { // Enforce-mode policy so the destructive-bash test observes a real deny // rather than a monitor-mode allow-with-tag. + // + // Gate ordering matters here: `safety.rm-suggest-trash` is intentionally + // placed BEFORE `rogue.destructive-bash` so that a plain `rm -rf` Bash + // command fires the nudge-bearing rule (the round-trip we're testing in + // T5). The legacy destructive-bash test now exercises the second + // alternation (`git push --force`) which still flows through + // `rogue.destructive-bash` — coverage preserved. + // + // `safety.secret-read-suggest-skill` matches `**/.aws/credentials`, a + // path that does NOT match `rogue.secret-read`'s `**/.env*` or + // `**/.ssh/**` globs, so the new rule fires cleanly without colliding. const policyPath = join(agentlockHome, "policy.yaml"); writeFileSync( policyPath, @@ -99,6 +110,23 @@ mode: enforce defaults: bash: allow gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - 'rm\\s+-rf\\b' + evaluate: + - kind: always + action: deny + nudge: "use 'trash ' (macOS) or move the directory aside — recoverable from Trash." + - id: safety.secret-read-suggest-skill + match: + tool: Read + path_glob: "**/.aws/credentials" + evaluate: + - kind: always + action: deny + nudge: "use the openagentlock/skills secret-fetcher skill if installed; otherwise ask the operator to paste the credentials." - id: rogue.destructive-bash match: tool: Bash @@ -379,6 +407,9 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { test("fake-hook: destructive Bash command → deny + rule_id=rogue.destructive-bash", async () => { const sessionId = await createSession(); + // Uses `git push --force` (the second alternation in destructive-bash) + // because `rm -rf` now matches safety.rm-suggest-trash first — see the + // gate-ordering comment in the policy fixture above. const proc = spawn({ cmd: [ "bun", @@ -392,7 +423,7 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { "--tool", "Bash", "--command", - "rm -rf /tmp/demo", + "git push origin main --force", "--json", "--url", `http://127.0.0.1:${port}`, @@ -500,6 +531,114 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { expect(v.rule_id).toBe("rogue.secret-read"); }); + // T5 nudge round-trip: a policy rule with `nudge:` must surface the + // hint string in the gate JSON response, and rules without a nudge must + // omit the field entirely (omitempty on the wire). + + test("fake-hook: rm -rf → deny with safety.rm-suggest-trash and nudge text", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--source", + "claude-code", + "--tool", + "Bash", + "--command", + "rm -rf /tmp/demo", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(3); + const v = JSON.parse(stdout) as { + verdict: string; + rule_id: string; + nudge: string; + }; + expect(v.verdict).toBe("deny"); + expect(v.rule_id).toBe("safety.rm-suggest-trash"); + expect(v.nudge).toContain("trash"); + }); + + test("fake-hook: Read .aws/credentials → deny with secret-read-suggest-skill nudge", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--tool", + "Read", + "--file-path", + "/home/alice/.aws/credentials", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(3); + const v = JSON.parse(stdout) as { + verdict: string; + rule_id: string; + nudge: string; + }; + expect(v.verdict).toBe("deny"); + expect(v.rule_id).toBe("safety.secret-read-suggest-skill"); + expect(v.nudge).toContain("secret-fetcher"); + }); + + test("fake-hook: allow path → no nudge field in JSON (omitempty wire check)", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--source", + "claude-code", + "--tool", + "Bash", + "--command", + "echo hello", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(0); + const v = JSON.parse(stdout) as Record; + expect(v.verdict).toBe("allow"); + // omitempty must drop the `nudge` key when no rule fired with one — + // not just produce an empty string. Assert wire-level absence. + expect("nudge" in v).toBe(false); + expect(Object.keys(v)).not.toContain("nudge"); + }); + test("fake-hook: pip install numpy → allow (pkg on allowlist)", async () => { const sessionId = await createSession(); const proc = spawn({ diff --git a/cli/tests/hook-claude-code.test.ts b/cli/tests/hook-claude-code.test.ts index 396297c..d3b5c5b 100644 --- a/cli/tests/hook-claude-code.test.ts +++ b/cli/tests/hook-claude-code.test.ts @@ -162,6 +162,47 @@ describe("hook claude-code shim", () => { expect(r.stdout).toContain('"permissionDecision":"deny"'); }); + test("deny with nudge → stderr carries the suggested-line", async () => { + // The daemon concatenates the rule's nudge into the deny reason as + // "\n\n→ Suggested: ". The CLI shim is a transparent + // forwarder: whatever permissionDecisionReason it gets MUST land on + // stderr verbatim so Claude Code surfaces the hint to the model. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/claude-code/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + session_id: "sess_nudge", + hook_event_name: "PreToolUse", + tool_name: "Bash", + tool_use_id: "t_nudge", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stderr).toContain("safety.rm-suggest-trash"); + }); + test("daemon unreachable → silent fail-open on every event", async () => { // Every event must produce empty stdout AND empty stderr with exit 0. // Anything else either pollutes the model's input stream or triggers diff --git a/cli/tests/hook-codex.test.ts b/cli/tests/hook-codex.test.ts index 3390174..cb2b074 100644 --- a/cli/tests/hook-codex.test.ts +++ b/cli/tests/hook-codex.test.ts @@ -157,6 +157,46 @@ describe("hook codex shim", () => { expect(r.stdout).toContain('"permissionDecision":"deny"'); }); + test("deny with nudge → stderr carries the suggested-line", async () => { + // Same forwarding contract as the claude-code shim: the daemon + // builds "\n\n→ Suggested: " and the shim mirrors + // that text onto stderr so Codex shows the hint to the model. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/codex/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + session_id: "sess_nudge", + hook_event_name: "PreToolUse", + tool_name: "Bash", + tool_use_id: "t_nudge", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stderr).toContain("safety.rm-suggest-trash"); + }); + test("daemon unreachable → silent fail-open on every event", async () => { // Codex hides hook stderr on exit-0 and renders any non-zero exit // as a "(failed)" banner that looks like a real error. Neither is diff --git a/cli/tests/hook-cursor.test.ts b/cli/tests/hook-cursor.test.ts index 85f77ae..1304cee 100644 --- a/cli/tests/hook-cursor.test.ts +++ b/cli/tests/hook-cursor.test.ts @@ -157,6 +157,46 @@ describe("hook cursor shim", () => { expect(r.stdout).toContain("rogue.destructive-bash"); }); + test("deny with nudge → stderr and stdout carry the suggested-line", async () => { + // Cursor shim mirrors the daemon's reason into BOTH stderr and the + // stdout JSON's agent_message field, so the hint surfaces no matter + // which channel Cursor renders. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/cursor/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + conversation_id: "conv_nudge", + hook_event_name: "preToolUse", + tool_name: "Bash", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stdout).toContain("→ Suggested: "); + expect(r.stdout).toContain("use trash instead"); + }); + test("daemon unreachable → silent fail-open with plain allow envelope on every event", async () => { // Cursor has no UI surface outside the model's input stream that we // can write to (no statusLine, no safe agent_message). On a transport diff --git a/control-plane/internal/api/gate.go b/control-plane/internal/api/gate.go index 0f16d15..6512d0d 100644 --- a/control-plane/internal/api/gate.go +++ b/control-plane/internal/api/gate.go @@ -27,6 +27,7 @@ type gateCheckResponse struct { Reason string `json:"reason"` LedgerSeq uint64 `json:"ledger_seq"` Monitor bool `json:"monitor,omitempty"` + Nudge string `json:"nudge,omitempty"` } func gateCheckHandler(d Deps) http.HandlerFunc { @@ -118,6 +119,7 @@ func gateCheckHandler(d Deps) http.HandlerFunc { Reason: result.Reason, LedgerSeq: entry.Seq, Monitor: result.MonitorMatch, + Nudge: result.Nudge, }) } } diff --git a/control-plane/internal/api/gate_test.go b/control-plane/internal/api/gate_test.go index dea55ef..4478b2a 100644 --- a/control-plane/internal/api/gate_test.go +++ b/control-plane/internal/api/gate_test.go @@ -215,3 +215,67 @@ func TestGateCheck_MissingRequiredFieldReturns400(t *testing.T) { t.Fatalf("status = %d, want 400", res.StatusCode) } } + +const nudgePolicyYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +// A policy rule with `nudge:` must surface that hint in the JSON +// response from /v1/gates/check on a deny verdict, so harness shims and +// the dashboard can show concrete remediation guidance. +func TestGateCheck_DenyResponseCarriesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + + body := fmt.Sprintf(`{ + "session_id": %q, + "source": "claude-code", + "tool": "Bash", + "input": {"command": "rm -rf /tmp/demo"} + }`, fx.sessionID) + res, out := postGateCheck(t, fx.srv, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + if out["verdict"] != "deny" { + t.Fatalf("verdict = %v", out["verdict"]) + } + if out["nudge"] != "use trash instead" { + t.Fatalf("nudge = %v, want %q", out["nudge"], "use trash instead") + } +} + +// Backward compat: rules without a nudge produce a response that omits +// the field entirely (omitempty). Existing API consumers see no change. +func TestGateCheck_AllowResponseOmitsNudgeField(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + + body := fmt.Sprintf(`{ + "session_id": %q, + "source": "claude-code", + "tool": "Bash", + "input": {"command": "ls -la"} + }`, fx.sessionID) + res, out := postGateCheck(t, fx.srv, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + if out["verdict"] != "allow" { + t.Fatalf("verdict = %v", out["verdict"]) + } + if _, present := out["nudge"]; present { + t.Fatalf("nudge field present on allow response: %v", out["nudge"]) + } +} diff --git a/control-plane/internal/api/hooks_claude.go b/control-plane/internal/api/hooks_claude.go index 4dddc52..ea6255b 100644 --- a/control-plane/internal/api/hooks_claude.go +++ b/control-plane/internal/api/hooks_claude.go @@ -114,16 +114,20 @@ func claudePreToolUseHandler(d Deps) http.HandlerFunc { return } + // On deny with a nudge attached, append the hint into the same + // channel — the harness's reason text — so the model sees both + // the verdict explanation and the suggested remediation. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "PreToolUse", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_claude_test.go b/control-plane/internal/api/hooks_claude_test.go index 005de5c..2b1b53f 100644 --- a/control-plane/internal/api/hooks_claude_test.go +++ b/control-plane/internal/api/hooks_claude_test.go @@ -354,6 +354,58 @@ func TestGateCheck_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { } } +// TestClaudePreToolUse_DenyConcatenatesNudge verifies that when a policy +// rule fires with a `nudge:` clause, the daemon's deny response carries +// both the original "matched rule X (deny)" reason AND the literal +// "→ Suggested: " suffix in BOTH permissionDecisionReason AND +// stopReason — these are the only channels the harness uses to surface +// the explanation to the model. The exact "→ Suggested: " prefix is the +// stable contract downstream consumers (CLI shim, dashboard) match on. +func TestClaudePreToolUse_DenyConcatenatesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "session_id": "claude-sess-nudge", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "toolu_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postClaudePre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path: a benign command must NOT carry the suggested-line. + allowBody := `{ + "session_id": "claude-sess-nudge-allow", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "toolu_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postClaudePre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + func TestClaudePreToolUse_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { t.Setenv("AGENTLOCK_MODE", "") runtimeMode.Store("firewall") diff --git a/control-plane/internal/api/hooks_codex.go b/control-plane/internal/api/hooks_codex.go index a4af14e..8dbe7b5 100644 --- a/control-plane/internal/api/hooks_codex.go +++ b/control-plane/internal/api/hooks_codex.go @@ -109,16 +109,19 @@ func codexPreToolUseHandler(d Deps) http.HandlerFunc { // Same response envelope as Claude — the shim binary maps it onto // Codex's exit-code semantics (allow → exit 0; deny → exit 2 with // the JSON also written to stdout for harnesses that read it). + // On deny + nudge, the hint is concatenated into the reason so + // the model sees it through Codex's stderr channel. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "PreToolUse", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_codex_test.go b/control-plane/internal/api/hooks_codex_test.go index ccde747..660ae93 100644 --- a/control-plane/internal/api/hooks_codex_test.go +++ b/control-plane/internal/api/hooks_codex_test.go @@ -267,6 +267,57 @@ func TestCodexSessionStart_CreatesSession(t *testing.T) { } } +// Mirror of TestClaudePreToolUse_DenyConcatenatesNudge for the Codex +// hook path. The daemon must inject the `→ Suggested: ` suffix +// into the Codex reply's reason fields whenever the firing rule carries +// a nudge. +func TestCodexPreToolUse_DenyConcatenatesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "session_id": "codex-sess-nudge", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "codex_nudge", + "turn_id": "turn_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postCodexPre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path stays untouched — no suggested-line on benign commands. + allowBody := `{ + "session_id": "codex-sess-nudge-allow", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "codex_nudge_allow", + "turn_id": "turn_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postCodexPre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + // Mirror of TestClaudePreToolUse_FirewallEscalatesPolicyMonitorMatch // (hooks_claude_test.go) — daemon=firewall must re-escalate a // policy-monitor match back to deny on the codex hook path too. diff --git a/control-plane/internal/api/hooks_cursor.go b/control-plane/internal/api/hooks_cursor.go index c62ef03..36ef755 100644 --- a/control-plane/internal/api/hooks_cursor.go +++ b/control-plane/internal/api/hooks_cursor.go @@ -263,16 +263,20 @@ func cursorGateHandler(d Deps, eventName string, kind cursorDedupeKind) http.Han return } + // On deny + nudge, append the hint into the reason. The cached + // response below preserves the concatenated form so the paired + // MCP/preToolUse event short-circuits with the same text. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: eventName, PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } // Cache the response keyed on tool_use_id so the paired event @@ -515,16 +519,18 @@ func cursorBeforeShellHandler(d Deps) http.HandlerFunc { // No ledger append — preToolUse owns the audit trail for this // tool call. We're just echoing the verdict in Cursor's expected // envelope so it can fail-closed at the shell-execution stage. + // Same nudge concat as the gate handler for parity. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "BeforeShellExecution", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_cursor_test.go b/control-plane/internal/api/hooks_cursor_test.go index f5a9cec..7dc9b30 100644 --- a/control-plane/internal/api/hooks_cursor_test.go +++ b/control-plane/internal/api/hooks_cursor_test.go @@ -447,6 +447,118 @@ func TestCursorPreToolUse_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { } } +// shellNudgePolicyYAML mirrors nudgePolicyYAML (gate_test.go) but matches +// on `tool: Shell`, the synthetic name BeforeShellExecution evaluates +// against (no tool_name in that payload). +const shellNudgePolicyYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Shell + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +// Mirror of TestClaudePreToolUse_DenyConcatenatesNudge for the Cursor +// preToolUse handler. The same gate handler also serves +// beforeMCPExecution, so this exercises the shared concat path. +func TestCursorPreToolUse_DenyConcatenatesNudge(t *testing.T) { + cursorResetDedupe() + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "conversation_id": "cursor-sess-nudge", + "generation_id": "gen-nudge", + "hook_event_name": "preToolUse", + "cursor_version": "1.7.0", + "tool_name": "Bash", + "tool_use_id": "cursor_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postCursorPre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path: a benign command must NOT carry the suggested-line. + cursorResetDedupe() + allowBody := `{ + "conversation_id": "cursor-sess-nudge-allow", + "generation_id": "gen-nudge-allow", + "hook_event_name": "preToolUse", + "cursor_version": "1.7.0", + "tool_name": "Bash", + "tool_use_id": "cursor_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postCursorPre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + +// Cursor 2.x's BeforeShellExecution payload routes through a separate +// handler (no tool_use_id, no tool_name); confirm the same nudge concat +// applies there too. +func TestCursorBeforeShellExecution_DenyConcatenatesNudge(t *testing.T) { + cursorResetDedupe() + fx := newGateFixture(t, shellNudgePolicyYAML) + body := `{ + "conversation_id": "cursor-shell-nudge", + "generation_id": "gen-shell-nudge", + "hook_event_name": "beforeShellExecution", + "command": "rm -rf /tmp/x", + "cwd": "/tmp", + "sandbox": false, + "cursor_version": "1.7.0" + }` + res, err := http.Post(fx.srv.URL+"/v1/hooks/cursor/before-shell-execution", "application/json", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST: %v", err) + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + var out map[string]any + _ = json.NewDecoder(res.Body).Decode(&out) + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } +} + // BeforeShellExecution doesn't append to the ledger (PreToolUse owns // that), so we only assert on the response envelope. func TestCursorBeforeShellExecution_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { diff --git a/control-plane/internal/api/mode.go b/control-plane/internal/api/mode.go index 0cf6174..32b418d 100644 --- a/control-plane/internal/api/mode.go +++ b/control-plane/internal/api/mode.go @@ -50,16 +50,40 @@ func setRuntimeMode(m string) bool { return false } +// denyReasonWithNudge composes the user-facing deny reason that the +// harness shims forward to the model. When the policy carried a nudge +// hint with the matching rule, we append it after the original reason +// using a stable, parseable shape: +// +// "\n\n→ Suggested: " +// +// The exact arrow + "Suggested" prefix is part of the contract — tests +// across the daemon, CLI, and downstream tooling rely on the literal +// substring "→ Suggested: " to spot the hint. Allow / monitor / +// non-matching paths feed result.Nudge == "" here and pass through +// unchanged. +func denyReasonWithNudge(result policy.EvalResult) string { + if result.Verdict != "deny" || result.Nudge == "" { + return result.Reason + } + return result.Reason + "\n\n→ Suggested: " + result.Nudge +} + // applyDaemonModeOverride composes the daemon-level mode with a policy // EvalResult. The daemon mode is the *outer* switch — the dashboard's // big red button — and must trump the per-policy / per-gate monitor // override the YAML can set. // -// Two directions: -// - daemon=monitor + verdict=deny → suppress to allow (existing -// behavior, just centralised here so every hook handler agrees) +// Three directions: +// - daemon=monitor + verdict=deny → suppress to allow, strip nudge +// (existing behavior, just centralised here so every hook handler +// agrees) +// - daemon=monitor + MonitorMatch passthrough → keep allow, strip +// nudge (the agent is being allowed; a remediation hint would be +// misleading) // - daemon=firewall + MonitorMatch + OriginalVerdict=deny → escalate -// back to deny, ignoring the policy's monitor downgrade +// back to deny, KEEP the nudge so the user sees the hint that the +// policy's monitor downgrade had stashed for this exact moment // // Returns the (possibly mutated) result + the daemon mode that was in // effect, so callers can stamp the ledger consistently. @@ -82,6 +106,16 @@ func applyDaemonModeOverride(r policy.EvalResult) (policy.EvalResult, string, st r.MonitorMatch = true r.Verdict = "allow" r.Reason = "deny suppressed by daemon monitor mode" + // policy layer leaves Nudge populated on real denies (and on + // monitor-downgraded denies, post-fix); clear it here because + // the agent is being allowed to proceed and showing a hint + // would be misleading. + r.Nudge = "" + } else if r.MonitorMatch { + // Policy-level monitor downgrade carried the nudge through + // (so daemon=firewall could re-attach on escalation). Daemon + // is also in monitor, so the agent proceeds — clear the hint. + r.Nudge = "" } case daemonModeFirewall: if r.MonitorMatch && r.OriginalVerdict == "deny" { @@ -93,6 +127,8 @@ func applyDaemonModeOverride(r policy.EvalResult) (policy.EvalResult, string, st r.Reason = reason r.MonitorMatch = false origVerdict = "deny" + // Nudge intentionally preserved: the policy layer carried it + // through the monitor downgrade for exactly this case. } } return r, mode, origVerdict diff --git a/control-plane/internal/api/mode_test.go b/control-plane/internal/api/mode_test.go new file mode 100644 index 0000000..74e80a6 --- /dev/null +++ b/control-plane/internal/api/mode_test.go @@ -0,0 +1,124 @@ +package api + +import ( + "testing" + + "github.com/openagentlock/openagentlock/control-plane/internal/policy" +) + +// applyDaemonModeOverride is the API-layer composition point that owns +// when to surface a nudge. The policy layer carries Nudge through a +// monitor downgrade (so daemon=firewall can re-escalate with the hint +// intact); this layer decides whether the user actually sees it. + +// daemon=firewall must escalate a policy-level monitor match back to +// deny AND surface the nudge the policy stashed for this exact case. +// This is the C1 regression: the nudge was being stripped at the +// policy layer, leaving the deny bare. +func TestApplyDaemonModeOverride_FirewallEscalatesWithNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("firewall") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "allow", + RuleID: "safety.rm-suggest-trash", + Reason: "monitor: matched rule safety.rm-suggest-trash (deny)", + MonitorMatch: true, + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, mode, origVerdict := applyDaemonModeOverride(in) + + if mode != "firewall" { + t.Fatalf("mode = %q, want firewall", mode) + } + if out.Verdict != "deny" { + t.Fatalf("Verdict = %q, want deny (firewall escalation)", out.Verdict) + } + if origVerdict != "deny" { + t.Fatalf("origVerdict = %q, want deny (ledger truth)", origVerdict) + } + if out.Nudge != "use trash instead" { + t.Fatalf("Nudge = %q, want %q (firewall escalation must keep the hint)", + out.Nudge, "use trash instead") + } + if out.MonitorMatch { + t.Fatal("MonitorMatch must be cleared after firewall escalation") + } + if out.RuleID != "safety.rm-suggest-trash" { + t.Fatalf("RuleID = %q, want safety.rm-suggest-trash", out.RuleID) + } +} + +// daemon=monitor + a policy-level monitor match (allow + MonitorMatch) +// must strip the nudge: the agent is being allowed to proceed in both +// layers, so surfacing a remediation hint would be misleading. This +// is the relocated cousin of the old policy-layer +// TestEvaluate_MonitorDowngradeSuppressesNudge. +func TestApplyDaemonModeOverride_MonitorMatchStripsNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("monitor") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "allow", + RuleID: "safety.rm-suggest-trash", + Reason: "monitor: matched rule safety.rm-suggest-trash (deny)", + MonitorMatch: true, + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, mode, origVerdict := applyDaemonModeOverride(in) + + if mode != "monitor" { + t.Fatalf("mode = %q, want monitor", mode) + } + if out.Verdict != "allow" { + t.Fatalf("Verdict = %q, want allow", out.Verdict) + } + if origVerdict != "allow" { + // The verdict reaching the function was already-allow; the + // ledger should record that as truth. (OriginalVerdict on the + // EvalResult preserves the policy-layer deny separately.) + t.Fatalf("origVerdict = %q, want allow", origVerdict) + } + if out.Nudge != "" { + t.Fatalf("Nudge = %q, want empty (agent is being allowed to proceed)", out.Nudge) + } + if !out.MonitorMatch { + t.Fatal("MonitorMatch must remain true so the ledger records the policy-level match") + } +} + +// daemon=monitor + a real policy-level deny must suppress to allow, +// stamp MonitorMatch, AND strip the nudge. Pre-existing behavior; +// covered here so the strip-on-final-allow contract is testable +// independent of any HTTP fixture. +func TestApplyDaemonModeOverride_MonitorSuppressesDenyAndStripsNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("monitor") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "deny", + RuleID: "safety.rm-suggest-trash", + Reason: "matched rule safety.rm-suggest-trash (deny)", + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, _, origVerdict := applyDaemonModeOverride(in) + + if out.Verdict != "allow" { + t.Fatalf("Verdict = %q, want allow (monitor suppression)", out.Verdict) + } + if origVerdict != "deny" { + t.Fatalf("origVerdict = %q, want deny (ledger truth)", origVerdict) + } + if !out.MonitorMatch { + t.Fatal("MonitorMatch must be set after monitor suppression") + } + if out.Nudge != "" { + t.Fatalf("Nudge = %q, want empty after monitor suppression", out.Nudge) + } +} diff --git a/control-plane/internal/api/policy_http.go b/control-plane/internal/api/policy_http.go index 7ccd289..f05a980 100644 --- a/control-plane/internal/api/policy_http.go +++ b/control-plane/internal/api/policy_http.go @@ -39,7 +39,7 @@ func policyViewHandler(d Deps) http.HandlerFunc { Tool: g.Match.Tool, ToolPrefix: g.Match.ToolPrefix, AnyCommandRegex: regexStrings(g.Match.Regexes), - Evaluators: evaluatorNames(g.Evaluators), + Evaluators: evaluatorNames(g.Evaluators()), }) } writeJSON(w, http.StatusOK, map[string]any{ diff --git a/control-plane/internal/api/types.go b/control-plane/internal/api/types.go index fbc6df6..e375b47 100644 --- a/control-plane/internal/api/types.go +++ b/control-plane/internal/api/types.go @@ -46,6 +46,11 @@ type GateCheckResponse struct { LedgerSeq uint64 `json:"ledger_seq"` ApprovalID string `json:"approval_id,omitempty"` // present when verdict=ask RequireRotate bool `json:"require_rotate,omitempty"` + // Nudge is an optional human-readable hint propagated from the firing + // rule's evaluate clause. Only set on deny verdicts where the policy + // author supplied guidance ("use trash instead"). omitempty keeps the + // field absent for existing rules that don't define a nudge. + Nudge string `json:"nudge,omitempty"` } type Approval struct { diff --git a/control-plane/internal/policy/policy.go b/control-plane/internal/policy/policy.go index 51bf0db..d1f2d91 100644 --- a/control-plane/internal/policy/policy.go +++ b/control-plane/internal/policy/policy.go @@ -69,6 +69,10 @@ type rawEval struct { OnUnknown string `yaml:"on_unknown,omitempty"` OnChanged string `yaml:"on_changed,omitempty"` Store string `yaml:"store,omitempty"` + // Nudge is an optional human-readable hint surfaced alongside a deny + // verdict so the agent gets concrete remediation guidance ("use trash + // instead") rather than a bare block. Ignored on allow / skip. + Nudge string `yaml:"nudge,omitempty"` } // Policy is the compiled form of a YAML policy. @@ -83,12 +87,36 @@ type Policy struct { RawYAML []byte } +// evalEntry pairs a compiled evaluator with the optional human-readable +// nudge string from the same evaluate[] YAML entry. Welding the two +// together via a single slice prevents the parallel-slice footgun the +// previous shape had (Evaluators + EvalNudges drifting out of sync). +type evalEntry struct { + eval Evaluator + nudge string +} + type Gate struct { - ID string - Mode string // monitor | enforce — inherits Policy.Mode if empty - Disabled bool // true = skip this gate during evaluation - Match Matcher - Evaluators []Evaluator + ID string + Mode string // monitor | enforce — inherits Policy.Mode if empty + Disabled bool // true = skip this gate during evaluation + Match Matcher + // Evals is the compiled evaluate[] pipeline. Each entry carries the + // evaluator plus its optional `nudge:` hint; the firing entry's nudge + // gets propagated into EvalResult on a deny verdict. + Evals []evalEntry +} + +// Evaluators returns the compiled evaluators from this gate, in order. +// External callers (the read-only policy view in the API package) need +// to introspect the evaluator types without poking at the unexported +// nudge field on each entry. +func (g Gate) Evaluators() []Evaluator { + out := make([]Evaluator, len(g.Evals)) + for i, e := range g.Evals { + out[i] = e.eval + } + return out } // Evaluator is a step in a gate's `evaluate` pipeline. Each returns @@ -381,6 +409,13 @@ type EvalResult struct { // can re-apply the original deny without re-running the gate. // Equal to Verdict when MonitorMatch is false. OriginalVerdict string + // Nudge is the optional human-readable hint propagated from the + // firing evaluate clause. Populated whenever a rule with a nudge + // matched and produced a deny — including monitor-downgraded + // matches, so a daemon-level firewall escalation can re-surface the + // hint. The API layer (applyDaemonModeOverride) is responsible for + // clearing this when the agent is being allowed to proceed. + Nudge string } // Load parses YAML into a compiled Policy. Returns an error for unknown @@ -407,13 +442,13 @@ func Load(r io.Reader) (*Policy, error) { if len(rg.Evaluate) == 0 { return nil, fmt.Errorf("gate %q: missing evaluate", rg.ID) } - evaluators := make([]Evaluator, 0, len(rg.Evaluate)) + evals := make([]evalEntry, 0, len(rg.Evaluate)) for i, e := range rg.Evaluate { ev, err := compileEvaluator(e) if err != nil { return nil, fmt.Errorf("gate %q: evaluate[%d]: %w", rg.ID, i, err) } - evaluators = append(evaluators, ev) + evals = append(evals, evalEntry{eval: ev, nudge: e.Nudge}) } m, err := compileMatch(rg.Match) if err != nil { @@ -423,11 +458,11 @@ func Load(r io.Reader) (*Policy, error) { return nil, fmt.Errorf("gate %q: match has no criteria", rg.ID) } gates = append(gates, Gate{ - ID: rg.ID, - Mode: rg.Mode, - Disabled: rg.Disabled, - Match: m, - Evaluators: evaluators, + ID: rg.ID, + Mode: rg.Mode, + Disabled: rg.Disabled, + Match: m, + Evals: evals, }) } @@ -453,12 +488,14 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { continue } verdict := "skip" - for _, ev := range g.Evaluators { - v := ev.Evaluate(req) + firingIdx := -1 + for i, e := range g.Evals { + v := e.eval.Evaluate(req) if v == "skip" { continue } verdict = v + firingIdx = i break } if verdict == "skip" { @@ -471,20 +508,38 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { effectiveMode = p.Mode } reason := fmt.Sprintf("matched rule %s (%s)", g.ID, verdict) + // Pull the nudge from the firing evaluator's entry. The slice is + // always built alongside the evaluators (see Load), so the index + // is guaranteed in range — no bounds check needed. + var nudge string + if firingIdx >= 0 { + nudge = g.Evals[firingIdx].nudge + } if effectiveMode == "monitor" { + // Nudge is preserved through monitor downgrade so daemon-level + // firewall escalation can re-attach it; the API layer decides + // whether to surface it. return EvalResult{ Verdict: "allow", RuleID: g.ID, Reason: "monitor: " + reason, MonitorMatch: true, OriginalVerdict: verdict, + Nudge: nudge, } } + // Only carry the nudge through on a deny verdict; an allow + // outcome means the agent is proceeding and doesn't need a hint. + var resultNudge string + if verdict == "deny" { + resultNudge = nudge + } return EvalResult{ Verdict: verdict, RuleID: g.ID, Reason: reason, OriginalVerdict: verdict, + Nudge: resultNudge, } } return EvalResult{ diff --git a/control-plane/internal/policy/policy_test.go b/control-plane/internal/policy/policy_test.go index 0704ec9..833e3a1 100644 --- a/control-plane/internal/policy/policy_test.go +++ b/control-plane/internal/policy/policy_test.go @@ -897,3 +897,172 @@ gates: t.Fatal("expected Load to reject malformed any_url_regex") } } + +const nudgeYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +const nudgeMonitorYAML = ` +version: 1 +mode: monitor +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +const nudgeMixedYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: with.nudge + match: + tool: Bash + command_regex: '^do-nudge\b' + evaluate: + - kind: always + action: deny + nudge: "try the safe alternative" + - id: without.nudge + match: + tool: Bash + command_regex: '^no-nudge\b' + evaluate: + - kind: always + action: deny +` + +// Loading a YAML rule with a `nudge:` produces a Gate whose evals slice +// carries the string at the matching index. The slice was previously +// two parallel slices (Evaluators + EvalNudges); welded into a single +// []evalEntry to prevent drift. +func TestLoad_NudgeStoredOnGate(t *testing.T) { + p, err := Load(strings.NewReader(nudgeYAML)) + if err != nil { + t.Fatalf("Load: %v", err) + } + if len(p.Gates) != 1 { + t.Fatalf("len(gates) = %d, want 1", len(p.Gates)) + } + g := p.Gates[0] + if len(g.Evals) != 1 { + t.Fatalf("len(Evals) = %d, want 1", len(g.Evals)) + } + if g.Evals[0].nudge != "use trash instead" { + t.Fatalf("Evals[0].nudge = %q, want %q", g.Evals[0].nudge, "use trash instead") + } + if g.Evals[0].eval == nil { + t.Fatal("Evals[0].eval must be non-nil") + } +} + +// A matching deny rule with a nudge surfaces the nudge in the result. +func TestEvaluate_DenyCarriesNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "rm -rf /tmp/x"}, + }) + if v.Verdict != "deny" { + t.Fatalf("verdict = %q, want deny", v.Verdict) + } + if v.Nudge != "use trash instead" { + t.Fatalf("nudge = %q, want %q", v.Nudge, "use trash instead") + } +} + +// A matching deny rule that omits the nudge field returns an empty string. +// (Backward compat — existing rules pre-nudge keep working unchanged.) +func TestEvaluate_DenyWithoutNudgeIsEmpty(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMixedYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "no-nudge target"}, + }) + if v.Verdict != "deny" || v.RuleID != "without.nudge" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "" { + t.Fatalf("nudge = %q, want empty (rule has no nudge)", v.Nudge) + } +} + +// Sister rule with a nudge still attaches it; confirms the parallel-slice +// lookup uses the correct gate (not just the first one). +func TestEvaluate_DenyWithNudgeWhenSiblingHasNone(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMixedYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "do-nudge target"}, + }) + if v.Verdict != "deny" || v.RuleID != "with.nudge" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "try the safe alternative" { + t.Fatalf("nudge = %q", v.Nudge) + } +} + +// A non-matching request returns the default allow verdict and never +// surfaces a nudge from somewhere else in the policy. +func TestEvaluate_NonMatchingRequestHasNoNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "ls -la"}, + }) + if v.Verdict != "allow" || v.RuleID != "default" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "" { + t.Fatalf("nudge leaked: %q", v.Nudge) + } +} + +// Monitor-mode matches downgrade deny → allow but PRESERVE the nudge so +// a downstream daemon-level firewall escalation can re-attach the hint. +// The decision to suppress the nudge on a final-allow verdict lives in +// the API layer (applyDaemonModeOverride) — see +// TestApplyDaemonModeOverride_MonitorMatchStripsNudge. +func TestEvaluate_MonitorDowngradeKeepsNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMonitorYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "rm -rf /tmp/x"}, + }) + if v.Verdict != "allow" { + t.Fatalf("verdict = %q, want allow (monitor)", v.Verdict) + } + if !v.MonitorMatch { + t.Fatal("MonitorMatch must be true on a monitor downgrade") + } + if v.OriginalVerdict != "deny" { + t.Fatalf("OriginalVerdict = %q, want deny", v.OriginalVerdict) + } + if v.Nudge != "use trash instead" { + t.Fatalf("nudge must survive monitor downgrade for firewall re-escalation, got %q", v.Nudge) + } +} diff --git a/docs/guide/policies.md b/docs/guide/policies.md index 7d8bc3e..9c2e068 100644 --- a/docs/guide/policies.md +++ b/docs/guide/policies.md @@ -151,6 +151,19 @@ gates: The daemon's regex engine is Go RE2 — **no negative lookahead, no backreferences**. If you find yourself reaching for `(?!…)`, invert the match: write a positive regex for the dangerous shape rather than a negative regex around the safe one. +### Nudges + +Each `evaluate[]` clause may carry an optional `nudge: ` hint. When the clause fires a `deny`, the harness shim splices the hint onto the reason it forwards to the model as `"\n\n→ Suggested: "`. Use it to redirect the agent toward a safer command (e.g. `trash` instead of `rm`) or to point at the right [skill](https://github.com/openagentlock/skills) instead of leaving it to retry blindly. + +```yaml +evaluate: + - kind: always + action: deny + nudge: "use `trash ` (macOS) — recoverable from Trash" +``` + +Nudges only surface on `deny` verdicts; `allow`, monitor-suppressed, and non-matching paths drop the field. See the [openagentlock/rules](https://github.com/openagentlock/rules) registry for `safety.rm-suggest-trash` and `safety.secret-read-suggest-skill` as canonical examples. + ## Enforcement vs monitor - **Monitor** — every gate matches but the verdict is downgraded to `allow` for the harness. Use this on day one. diff --git a/docs/reference/api.md b/docs/reference/api.md index 1bac3b8..d1f3eb4 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -22,6 +22,25 @@ Toggles the daemon-level `monitor` ↔ `firewall` switch. This is the outer over Returns the list of currently active gates with their per-gate status (matched count, last-hit timestamp). +`POST /v1/gates/check` + +Synchronous verdict for a single tool call. Request body: + +- `session_id` — required, string. From `POST /v1/sessions/create`. +- `source` — required, string. Harness id (`claude-code`, `codex`, `cursor`, `mcp-proxy`). +- `tool` — required, string. The tool name being checked (`Bash`, `Read`, `Write`, `mcp____`). +- `input` — required, object. The tool's input shape (e.g. `{ "command": "rm -rf foo" }` for Bash). +- `cwd` — optional, string. Working directory if relevant to the rule. +- `meta` — optional, object. Free-form context for evaluators that look beyond `input`. + +Response fields: + +- `verdict` — `allow` or `deny` (the daemon never returns `ask` on this path) +- `rule_id` — id of the gate that matched, empty when no rule fired +- `reason` — short human-readable explanation, surfaced verbatim by the harness shim +- `nudge` — optional, string. Present only when the matched rule defined a `nudge:` hint **and** the final verdict is `deny`. Allow / monitor-suppressed paths drop it. See [Policies → Nudges](../guide/policies.md#nudges). +- `ledger_seq` — sequence number of the corresponding ledger leaf + ## Install `POST /v1/install/plan` — render a diff of what would change in the harness configs diff --git a/docs/reference/hooks.md b/docs/reference/hooks.md index ae674a9..7787fce 100644 --- a/docs/reference/hooks.md +++ b/docs/reference/hooks.md @@ -81,3 +81,7 @@ The control plane normalizes hook payloads into a single shape regardless of har ``` Ledger leaves use the same shape plus `verdict`, `reason`, `policy_rule_id`, and `signer`. + +## Nudges in deny replies + +When a matched rule carries a `nudge:` hint (see [Policies → Nudges](../guide/policies.md#nudges)) and the final verdict is `deny`, every harness shim — Claude Code, Codex, Cursor — appends the hint to the deny reason it forwards to the model. The format is the literal string `"\n\n→ Suggested: "` — arrow `→`, capital `S`, colon-space — and is intentionally stable so external tools can grep for `→ Suggested: ` to spot the hint. Allow, monitor-suppressed, and non-matching paths drop the field; the reason flows through unchanged.