From 8959b0339cc3a69f7c18ec7f33fa214f4e122bec Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 14:58:26 -0700 Subject: [PATCH 1/6] feat(daemon): plumb nudge field from policy rule through verdict response Adds optional human-readable hint propagation from policy YAML `evaluate[].nudge` through the policy evaluator into the gate.check HTTP response. Hint is only surfaced on deny verdicts; allow / monitor downgrades / daemon-monitor suppression all clear it so the agent never sees remediation guidance for a call it's allowed to make. Co-Authored-By: Claude Opus 4.7 (1M context) --- control-plane/internal/api/gate.go | 2 + control-plane/internal/api/gate_test.go | 64 ++++++++ control-plane/internal/api/mode.go | 3 + control-plane/internal/api/types.go | 5 + control-plane/internal/policy/policy.go | 38 ++++- control-plane/internal/policy/policy_test.go | 160 +++++++++++++++++++ 6 files changed, 271 insertions(+), 1 deletion(-) diff --git a/control-plane/internal/api/gate.go b/control-plane/internal/api/gate.go index 0f16d15..6512d0d 100644 --- a/control-plane/internal/api/gate.go +++ b/control-plane/internal/api/gate.go @@ -27,6 +27,7 @@ type gateCheckResponse struct { Reason string `json:"reason"` LedgerSeq uint64 `json:"ledger_seq"` Monitor bool `json:"monitor,omitempty"` + Nudge string `json:"nudge,omitempty"` } func gateCheckHandler(d Deps) http.HandlerFunc { @@ -118,6 +119,7 @@ func gateCheckHandler(d Deps) http.HandlerFunc { Reason: result.Reason, LedgerSeq: entry.Seq, Monitor: result.MonitorMatch, + Nudge: result.Nudge, }) } } diff --git a/control-plane/internal/api/gate_test.go b/control-plane/internal/api/gate_test.go index dea55ef..4478b2a 100644 --- a/control-plane/internal/api/gate_test.go +++ b/control-plane/internal/api/gate_test.go @@ -215,3 +215,67 @@ func TestGateCheck_MissingRequiredFieldReturns400(t *testing.T) { t.Fatalf("status = %d, want 400", res.StatusCode) } } + +const nudgePolicyYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +// A policy rule with `nudge:` must surface that hint in the JSON +// response from /v1/gates/check on a deny verdict, so harness shims and +// the dashboard can show concrete remediation guidance. +func TestGateCheck_DenyResponseCarriesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + + body := fmt.Sprintf(`{ + "session_id": %q, + "source": "claude-code", + "tool": "Bash", + "input": {"command": "rm -rf /tmp/demo"} + }`, fx.sessionID) + res, out := postGateCheck(t, fx.srv, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + if out["verdict"] != "deny" { + t.Fatalf("verdict = %v", out["verdict"]) + } + if out["nudge"] != "use trash instead" { + t.Fatalf("nudge = %v, want %q", out["nudge"], "use trash instead") + } +} + +// Backward compat: rules without a nudge produce a response that omits +// the field entirely (omitempty). Existing API consumers see no change. +func TestGateCheck_AllowResponseOmitsNudgeField(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + + body := fmt.Sprintf(`{ + "session_id": %q, + "source": "claude-code", + "tool": "Bash", + "input": {"command": "ls -la"} + }`, fx.sessionID) + res, out := postGateCheck(t, fx.srv, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + if out["verdict"] != "allow" { + t.Fatalf("verdict = %v", out["verdict"]) + } + if _, present := out["nudge"]; present { + t.Fatalf("nudge field present on allow response: %v", out["nudge"]) + } +} diff --git a/control-plane/internal/api/mode.go b/control-plane/internal/api/mode.go index 0cf6174..f00a9c3 100644 --- a/control-plane/internal/api/mode.go +++ b/control-plane/internal/api/mode.go @@ -82,6 +82,9 @@ func applyDaemonModeOverride(r policy.EvalResult) (policy.EvalResult, string, st r.MonitorMatch = true r.Verdict = "allow" r.Reason = "deny suppressed by daemon monitor mode" + // Suppress the nudge: the agent is being allowed to proceed, + // so a "use trash instead" hint would be misleading. + r.Nudge = "" } case daemonModeFirewall: if r.MonitorMatch && r.OriginalVerdict == "deny" { diff --git a/control-plane/internal/api/types.go b/control-plane/internal/api/types.go index fbc6df6..e375b47 100644 --- a/control-plane/internal/api/types.go +++ b/control-plane/internal/api/types.go @@ -46,6 +46,11 @@ type GateCheckResponse struct { LedgerSeq uint64 `json:"ledger_seq"` ApprovalID string `json:"approval_id,omitempty"` // present when verdict=ask RequireRotate bool `json:"require_rotate,omitempty"` + // Nudge is an optional human-readable hint propagated from the firing + // rule's evaluate clause. Only set on deny verdicts where the policy + // author supplied guidance ("use trash instead"). omitempty keeps the + // field absent for existing rules that don't define a nudge. + Nudge string `json:"nudge,omitempty"` } type Approval struct { diff --git a/control-plane/internal/policy/policy.go b/control-plane/internal/policy/policy.go index 51bf0db..db7c327 100644 --- a/control-plane/internal/policy/policy.go +++ b/control-plane/internal/policy/policy.go @@ -69,6 +69,10 @@ type rawEval struct { OnUnknown string `yaml:"on_unknown,omitempty"` OnChanged string `yaml:"on_changed,omitempty"` Store string `yaml:"store,omitempty"` + // Nudge is an optional human-readable hint surfaced alongside a deny + // verdict so the agent gets concrete remediation guidance ("use trash + // instead") rather than a bare block. Ignored on allow / skip. + Nudge string `yaml:"nudge,omitempty"` } // Policy is the compiled form of a YAML policy. @@ -89,6 +93,12 @@ type Gate struct { Disabled bool // true = skip this gate during evaluation Match Matcher Evaluators []Evaluator + // EvalNudges is a parallel slice to Evaluators carrying the optional + // `nudge:` string from each evaluate[] entry. Same length as + // Evaluators; entries are "" when the corresponding rawEval omitted + // the field. Evaluate uses the firing index to pull the matching + // nudge into the result without changing the Evaluator interface. + EvalNudges []string } // Evaluator is a step in a gate's `evaluate` pipeline. Each returns @@ -381,6 +391,11 @@ type EvalResult struct { // can re-apply the original deny without re-running the gate. // Equal to Verdict when MonitorMatch is false. OriginalVerdict string + // Nudge is the optional human-readable hint propagated from the + // firing evaluate clause. Only populated when the final verdict is + // "deny" — allow verdicts and monitor-downgraded matches leave it + // empty (the agent is proceeding, no need to nudge). + Nudge string } // Load parses YAML into a compiled Policy. Returns an error for unknown @@ -408,12 +423,14 @@ func Load(r io.Reader) (*Policy, error) { return nil, fmt.Errorf("gate %q: missing evaluate", rg.ID) } evaluators := make([]Evaluator, 0, len(rg.Evaluate)) + nudges := make([]string, 0, len(rg.Evaluate)) for i, e := range rg.Evaluate { ev, err := compileEvaluator(e) if err != nil { return nil, fmt.Errorf("gate %q: evaluate[%d]: %w", rg.ID, i, err) } evaluators = append(evaluators, ev) + nudges = append(nudges, e.Nudge) } m, err := compileMatch(rg.Match) if err != nil { @@ -428,6 +445,7 @@ func Load(r io.Reader) (*Policy, error) { Disabled: rg.Disabled, Match: m, Evaluators: evaluators, + EvalNudges: nudges, }) } @@ -453,12 +471,14 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { continue } verdict := "skip" - for _, ev := range g.Evaluators { + firingIdx := -1 + for i, ev := range g.Evaluators { v := ev.Evaluate(req) if v == "skip" { continue } verdict = v + firingIdx = i break } if verdict == "skip" { @@ -471,7 +491,16 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { effectiveMode = p.Mode } reason := fmt.Sprintf("matched rule %s (%s)", g.ID, verdict) + // Pull the nudge from the firing evaluator's parallel-slice + // entry. Bounds-checked because EvalNudges may be nil/short on + // hand-built Gate values from tests. + var nudge string + if firingIdx >= 0 && firingIdx < len(g.EvalNudges) { + nudge = g.EvalNudges[firingIdx] + } if effectiveMode == "monitor" { + // Monitor-downgraded matches let the agent proceed, so + // suppress the nudge — there is no block to remediate. return EvalResult{ Verdict: "allow", RuleID: g.ID, @@ -480,11 +509,18 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { OriginalVerdict: verdict, } } + // Only carry the nudge through on a deny verdict; an allow + // outcome means the agent is proceeding and doesn't need a hint. + var resultNudge string + if verdict == "deny" { + resultNudge = nudge + } return EvalResult{ Verdict: verdict, RuleID: g.ID, Reason: reason, OriginalVerdict: verdict, + Nudge: resultNudge, } } return EvalResult{ diff --git a/control-plane/internal/policy/policy_test.go b/control-plane/internal/policy/policy_test.go index 0704ec9..463a96f 100644 --- a/control-plane/internal/policy/policy_test.go +++ b/control-plane/internal/policy/policy_test.go @@ -897,3 +897,163 @@ gates: t.Fatal("expected Load to reject malformed any_url_regex") } } + +const nudgeYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +const nudgeMonitorYAML = ` +version: 1 +mode: monitor +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +const nudgeMixedYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: with.nudge + match: + tool: Bash + command_regex: '^do-nudge\b' + evaluate: + - kind: always + action: deny + nudge: "try the safe alternative" + - id: without.nudge + match: + tool: Bash + command_regex: '^no-nudge\b' + evaluate: + - kind: always + action: deny +` + +// Loading a YAML rule with a `nudge:` produces a Gate whose +// EvalNudges parallel slice carries the string at the matching index. +func TestLoad_NudgeStoredOnGate(t *testing.T) { + p, err := Load(strings.NewReader(nudgeYAML)) + if err != nil { + t.Fatalf("Load: %v", err) + } + if len(p.Gates) != 1 { + t.Fatalf("len(gates) = %d, want 1", len(p.Gates)) + } + g := p.Gates[0] + if len(g.EvalNudges) != len(g.Evaluators) { + t.Fatalf("EvalNudges len = %d, Evaluators len = %d (must be parallel)", + len(g.EvalNudges), len(g.Evaluators)) + } + if g.EvalNudges[0] != "use trash instead" { + t.Fatalf("EvalNudges[0] = %q, want %q", g.EvalNudges[0], "use trash instead") + } +} + +// A matching deny rule with a nudge surfaces the nudge in the result. +func TestEvaluate_DenyCarriesNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "rm -rf /tmp/x"}, + }) + if v.Verdict != "deny" { + t.Fatalf("verdict = %q, want deny", v.Verdict) + } + if v.Nudge != "use trash instead" { + t.Fatalf("nudge = %q, want %q", v.Nudge, "use trash instead") + } +} + +// A matching deny rule that omits the nudge field returns an empty string. +// (Backward compat — existing rules pre-nudge keep working unchanged.) +func TestEvaluate_DenyWithoutNudgeIsEmpty(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMixedYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "no-nudge target"}, + }) + if v.Verdict != "deny" || v.RuleID != "without.nudge" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "" { + t.Fatalf("nudge = %q, want empty (rule has no nudge)", v.Nudge) + } +} + +// Sister rule with a nudge still attaches it; confirms the parallel-slice +// lookup uses the correct gate (not just the first one). +func TestEvaluate_DenyWithNudgeWhenSiblingHasNone(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMixedYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "do-nudge target"}, + }) + if v.Verdict != "deny" || v.RuleID != "with.nudge" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "try the safe alternative" { + t.Fatalf("nudge = %q", v.Nudge) + } +} + +// A non-matching request returns the default allow verdict and never +// surfaces a nudge from somewhere else in the policy. +func TestEvaluate_NonMatchingRequestHasNoNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "ls -la"}, + }) + if v.Verdict != "allow" || v.RuleID != "default" { + t.Fatalf("got %+v", v) + } + if v.Nudge != "" { + t.Fatalf("nudge leaked: %q", v.Nudge) + } +} + +// Monitor-mode matches downgrade deny → allow and must NOT carry the +// nudge through. The agent is being allowed to proceed; surfacing +// remediation guidance would be misleading. +func TestEvaluate_MonitorDowngradeSuppressesNudge(t *testing.T) { + p, _ := Load(strings.NewReader(nudgeMonitorYAML)) + v := p.Evaluate(EvalRequest{ + Tool: "Bash", + Input: map[string]any{"command": "rm -rf /tmp/x"}, + }) + if v.Verdict != "allow" { + t.Fatalf("verdict = %q, want allow (monitor)", v.Verdict) + } + if !v.MonitorMatch { + t.Fatal("MonitorMatch must be true on a monitor downgrade") + } + if v.Nudge != "" { + t.Fatalf("nudge must be empty on monitor downgrade, got %q", v.Nudge) + } +} From cb2842a8653ceb322a0462b6de5174e36f3113bc Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 15:18:17 -0700 Subject: [PATCH 2/6] fix(daemon): preserve nudge through monitor for firewall escalation; replace parallel slice with struct C1: nudge was stripped at the policy layer during monitor downgrade, so when daemon=firewall escalated a MonitorMatch back to deny, the hint was already gone. Move nudge-stripping up to the API layer (applyDaemonModeOverride): policy.Evaluate now carries Nudge through the monitor branch, and the daemon override decides whether to surface or clear it based on the final user-visible verdict. I1: replace Gate.Evaluators []Evaluator + Gate.EvalNudges []string parallel slices with a single Gate.Evals []evalEntry. The two slices were a footgun (could drift out of sync); welding them together removes the bounds-check at the firing-index lookup. External callers use the new Gate.Evaluators() accessor for type-name introspection. M1: strengthen the daemon-monitor strip-site comment to call out that the policy layer leaves Nudge populated and the API layer clears it because the agent is being allowed to proceed. Tests: add TestApplyDaemonModeOverride_FirewallEscalatesWithNudge covering the C1 win, plus _MonitorMatchStripsNudge (relocated cousin of the old policy-layer test) and _MonitorSuppressesDenyAndStripsNudge. The renamed TestEvaluate_MonitorDowngradeKeepsNudge in the policy package now asserts the new behaviour: nudge survives the downgrade. Co-Authored-By: Claude Opus 4.7 (1M context) --- control-plane/internal/api/mode.go | 26 +++- control-plane/internal/api/mode_test.go | 124 +++++++++++++++++++ control-plane/internal/api/policy_http.go | 2 +- control-plane/internal/policy/policy.go | 85 ++++++++----- control-plane/internal/policy/policy_test.go | 35 ++++-- 5 files changed, 219 insertions(+), 53 deletions(-) create mode 100644 control-plane/internal/api/mode_test.go diff --git a/control-plane/internal/api/mode.go b/control-plane/internal/api/mode.go index f00a9c3..94de75c 100644 --- a/control-plane/internal/api/mode.go +++ b/control-plane/internal/api/mode.go @@ -55,11 +55,16 @@ func setRuntimeMode(m string) bool { // big red button — and must trump the per-policy / per-gate monitor // override the YAML can set. // -// Two directions: -// - daemon=monitor + verdict=deny → suppress to allow (existing -// behavior, just centralised here so every hook handler agrees) +// Three directions: +// - daemon=monitor + verdict=deny → suppress to allow, strip nudge +// (existing behavior, just centralised here so every hook handler +// agrees) +// - daemon=monitor + MonitorMatch passthrough → keep allow, strip +// nudge (the agent is being allowed; a remediation hint would be +// misleading) // - daemon=firewall + MonitorMatch + OriginalVerdict=deny → escalate -// back to deny, ignoring the policy's monitor downgrade +// back to deny, KEEP the nudge so the user sees the hint that the +// policy's monitor downgrade had stashed for this exact moment // // Returns the (possibly mutated) result + the daemon mode that was in // effect, so callers can stamp the ledger consistently. @@ -82,8 +87,15 @@ func applyDaemonModeOverride(r policy.EvalResult) (policy.EvalResult, string, st r.MonitorMatch = true r.Verdict = "allow" r.Reason = "deny suppressed by daemon monitor mode" - // Suppress the nudge: the agent is being allowed to proceed, - // so a "use trash instead" hint would be misleading. + // policy layer leaves Nudge populated on real denies (and on + // monitor-downgraded denies, post-fix); clear it here because + // the agent is being allowed to proceed and showing a hint + // would be misleading. + r.Nudge = "" + } else if r.MonitorMatch { + // Policy-level monitor downgrade carried the nudge through + // (so daemon=firewall could re-attach on escalation). Daemon + // is also in monitor, so the agent proceeds — clear the hint. r.Nudge = "" } case daemonModeFirewall: @@ -96,6 +108,8 @@ func applyDaemonModeOverride(r policy.EvalResult) (policy.EvalResult, string, st r.Reason = reason r.MonitorMatch = false origVerdict = "deny" + // Nudge intentionally preserved: the policy layer carried it + // through the monitor downgrade for exactly this case. } } return r, mode, origVerdict diff --git a/control-plane/internal/api/mode_test.go b/control-plane/internal/api/mode_test.go new file mode 100644 index 0000000..74e80a6 --- /dev/null +++ b/control-plane/internal/api/mode_test.go @@ -0,0 +1,124 @@ +package api + +import ( + "testing" + + "github.com/openagentlock/openagentlock/control-plane/internal/policy" +) + +// applyDaemonModeOverride is the API-layer composition point that owns +// when to surface a nudge. The policy layer carries Nudge through a +// monitor downgrade (so daemon=firewall can re-escalate with the hint +// intact); this layer decides whether the user actually sees it. + +// daemon=firewall must escalate a policy-level monitor match back to +// deny AND surface the nudge the policy stashed for this exact case. +// This is the C1 regression: the nudge was being stripped at the +// policy layer, leaving the deny bare. +func TestApplyDaemonModeOverride_FirewallEscalatesWithNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("firewall") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "allow", + RuleID: "safety.rm-suggest-trash", + Reason: "monitor: matched rule safety.rm-suggest-trash (deny)", + MonitorMatch: true, + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, mode, origVerdict := applyDaemonModeOverride(in) + + if mode != "firewall" { + t.Fatalf("mode = %q, want firewall", mode) + } + if out.Verdict != "deny" { + t.Fatalf("Verdict = %q, want deny (firewall escalation)", out.Verdict) + } + if origVerdict != "deny" { + t.Fatalf("origVerdict = %q, want deny (ledger truth)", origVerdict) + } + if out.Nudge != "use trash instead" { + t.Fatalf("Nudge = %q, want %q (firewall escalation must keep the hint)", + out.Nudge, "use trash instead") + } + if out.MonitorMatch { + t.Fatal("MonitorMatch must be cleared after firewall escalation") + } + if out.RuleID != "safety.rm-suggest-trash" { + t.Fatalf("RuleID = %q, want safety.rm-suggest-trash", out.RuleID) + } +} + +// daemon=monitor + a policy-level monitor match (allow + MonitorMatch) +// must strip the nudge: the agent is being allowed to proceed in both +// layers, so surfacing a remediation hint would be misleading. This +// is the relocated cousin of the old policy-layer +// TestEvaluate_MonitorDowngradeSuppressesNudge. +func TestApplyDaemonModeOverride_MonitorMatchStripsNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("monitor") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "allow", + RuleID: "safety.rm-suggest-trash", + Reason: "monitor: matched rule safety.rm-suggest-trash (deny)", + MonitorMatch: true, + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, mode, origVerdict := applyDaemonModeOverride(in) + + if mode != "monitor" { + t.Fatalf("mode = %q, want monitor", mode) + } + if out.Verdict != "allow" { + t.Fatalf("Verdict = %q, want allow", out.Verdict) + } + if origVerdict != "allow" { + // The verdict reaching the function was already-allow; the + // ledger should record that as truth. (OriginalVerdict on the + // EvalResult preserves the policy-layer deny separately.) + t.Fatalf("origVerdict = %q, want allow", origVerdict) + } + if out.Nudge != "" { + t.Fatalf("Nudge = %q, want empty (agent is being allowed to proceed)", out.Nudge) + } + if !out.MonitorMatch { + t.Fatal("MonitorMatch must remain true so the ledger records the policy-level match") + } +} + +// daemon=monitor + a real policy-level deny must suppress to allow, +// stamp MonitorMatch, AND strip the nudge. Pre-existing behavior; +// covered here so the strip-on-final-allow contract is testable +// independent of any HTTP fixture. +func TestApplyDaemonModeOverride_MonitorSuppressesDenyAndStripsNudge(t *testing.T) { + t.Setenv("AGENTLOCK_MODE", "") + runtimeMode.Store("monitor") + t.Cleanup(func() { runtimeMode.Store("") }) + + in := policy.EvalResult{ + Verdict: "deny", + RuleID: "safety.rm-suggest-trash", + Reason: "matched rule safety.rm-suggest-trash (deny)", + OriginalVerdict: "deny", + Nudge: "use trash instead", + } + out, _, origVerdict := applyDaemonModeOverride(in) + + if out.Verdict != "allow" { + t.Fatalf("Verdict = %q, want allow (monitor suppression)", out.Verdict) + } + if origVerdict != "deny" { + t.Fatalf("origVerdict = %q, want deny (ledger truth)", origVerdict) + } + if !out.MonitorMatch { + t.Fatal("MonitorMatch must be set after monitor suppression") + } + if out.Nudge != "" { + t.Fatalf("Nudge = %q, want empty after monitor suppression", out.Nudge) + } +} diff --git a/control-plane/internal/api/policy_http.go b/control-plane/internal/api/policy_http.go index 7ccd289..f05a980 100644 --- a/control-plane/internal/api/policy_http.go +++ b/control-plane/internal/api/policy_http.go @@ -39,7 +39,7 @@ func policyViewHandler(d Deps) http.HandlerFunc { Tool: g.Match.Tool, ToolPrefix: g.Match.ToolPrefix, AnyCommandRegex: regexStrings(g.Match.Regexes), - Evaluators: evaluatorNames(g.Evaluators), + Evaluators: evaluatorNames(g.Evaluators()), }) } writeJSON(w, http.StatusOK, map[string]any{ diff --git a/control-plane/internal/policy/policy.go b/control-plane/internal/policy/policy.go index db7c327..d1f2d91 100644 --- a/control-plane/internal/policy/policy.go +++ b/control-plane/internal/policy/policy.go @@ -87,18 +87,36 @@ type Policy struct { RawYAML []byte } +// evalEntry pairs a compiled evaluator with the optional human-readable +// nudge string from the same evaluate[] YAML entry. Welding the two +// together via a single slice prevents the parallel-slice footgun the +// previous shape had (Evaluators + EvalNudges drifting out of sync). +type evalEntry struct { + eval Evaluator + nudge string +} + type Gate struct { - ID string - Mode string // monitor | enforce — inherits Policy.Mode if empty - Disabled bool // true = skip this gate during evaluation - Match Matcher - Evaluators []Evaluator - // EvalNudges is a parallel slice to Evaluators carrying the optional - // `nudge:` string from each evaluate[] entry. Same length as - // Evaluators; entries are "" when the corresponding rawEval omitted - // the field. Evaluate uses the firing index to pull the matching - // nudge into the result without changing the Evaluator interface. - EvalNudges []string + ID string + Mode string // monitor | enforce — inherits Policy.Mode if empty + Disabled bool // true = skip this gate during evaluation + Match Matcher + // Evals is the compiled evaluate[] pipeline. Each entry carries the + // evaluator plus its optional `nudge:` hint; the firing entry's nudge + // gets propagated into EvalResult on a deny verdict. + Evals []evalEntry +} + +// Evaluators returns the compiled evaluators from this gate, in order. +// External callers (the read-only policy view in the API package) need +// to introspect the evaluator types without poking at the unexported +// nudge field on each entry. +func (g Gate) Evaluators() []Evaluator { + out := make([]Evaluator, len(g.Evals)) + for i, e := range g.Evals { + out[i] = e.eval + } + return out } // Evaluator is a step in a gate's `evaluate` pipeline. Each returns @@ -392,9 +410,11 @@ type EvalResult struct { // Equal to Verdict when MonitorMatch is false. OriginalVerdict string // Nudge is the optional human-readable hint propagated from the - // firing evaluate clause. Only populated when the final verdict is - // "deny" — allow verdicts and monitor-downgraded matches leave it - // empty (the agent is proceeding, no need to nudge). + // firing evaluate clause. Populated whenever a rule with a nudge + // matched and produced a deny — including monitor-downgraded + // matches, so a daemon-level firewall escalation can re-surface the + // hint. The API layer (applyDaemonModeOverride) is responsible for + // clearing this when the agent is being allowed to proceed. Nudge string } @@ -422,15 +442,13 @@ func Load(r io.Reader) (*Policy, error) { if len(rg.Evaluate) == 0 { return nil, fmt.Errorf("gate %q: missing evaluate", rg.ID) } - evaluators := make([]Evaluator, 0, len(rg.Evaluate)) - nudges := make([]string, 0, len(rg.Evaluate)) + evals := make([]evalEntry, 0, len(rg.Evaluate)) for i, e := range rg.Evaluate { ev, err := compileEvaluator(e) if err != nil { return nil, fmt.Errorf("gate %q: evaluate[%d]: %w", rg.ID, i, err) } - evaluators = append(evaluators, ev) - nudges = append(nudges, e.Nudge) + evals = append(evals, evalEntry{eval: ev, nudge: e.Nudge}) } m, err := compileMatch(rg.Match) if err != nil { @@ -440,12 +458,11 @@ func Load(r io.Reader) (*Policy, error) { return nil, fmt.Errorf("gate %q: match has no criteria", rg.ID) } gates = append(gates, Gate{ - ID: rg.ID, - Mode: rg.Mode, - Disabled: rg.Disabled, - Match: m, - Evaluators: evaluators, - EvalNudges: nudges, + ID: rg.ID, + Mode: rg.Mode, + Disabled: rg.Disabled, + Match: m, + Evals: evals, }) } @@ -472,8 +489,8 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { } verdict := "skip" firingIdx := -1 - for i, ev := range g.Evaluators { - v := ev.Evaluate(req) + for i, e := range g.Evals { + v := e.eval.Evaluate(req) if v == "skip" { continue } @@ -491,22 +508,24 @@ func (p *Policy) Evaluate(req EvalRequest) EvalResult { effectiveMode = p.Mode } reason := fmt.Sprintf("matched rule %s (%s)", g.ID, verdict) - // Pull the nudge from the firing evaluator's parallel-slice - // entry. Bounds-checked because EvalNudges may be nil/short on - // hand-built Gate values from tests. + // Pull the nudge from the firing evaluator's entry. The slice is + // always built alongside the evaluators (see Load), so the index + // is guaranteed in range — no bounds check needed. var nudge string - if firingIdx >= 0 && firingIdx < len(g.EvalNudges) { - nudge = g.EvalNudges[firingIdx] + if firingIdx >= 0 { + nudge = g.Evals[firingIdx].nudge } if effectiveMode == "monitor" { - // Monitor-downgraded matches let the agent proceed, so - // suppress the nudge — there is no block to remediate. + // Nudge is preserved through monitor downgrade so daemon-level + // firewall escalation can re-attach it; the API layer decides + // whether to surface it. return EvalResult{ Verdict: "allow", RuleID: g.ID, Reason: "monitor: " + reason, MonitorMatch: true, OriginalVerdict: verdict, + Nudge: nudge, } } // Only carry the nudge through on a deny verdict; an allow diff --git a/control-plane/internal/policy/policy_test.go b/control-plane/internal/policy/policy_test.go index 463a96f..833e3a1 100644 --- a/control-plane/internal/policy/policy_test.go +++ b/control-plane/internal/policy/policy_test.go @@ -955,8 +955,10 @@ gates: action: deny ` -// Loading a YAML rule with a `nudge:` produces a Gate whose -// EvalNudges parallel slice carries the string at the matching index. +// Loading a YAML rule with a `nudge:` produces a Gate whose evals slice +// carries the string at the matching index. The slice was previously +// two parallel slices (Evaluators + EvalNudges); welded into a single +// []evalEntry to prevent drift. func TestLoad_NudgeStoredOnGate(t *testing.T) { p, err := Load(strings.NewReader(nudgeYAML)) if err != nil { @@ -966,12 +968,14 @@ func TestLoad_NudgeStoredOnGate(t *testing.T) { t.Fatalf("len(gates) = %d, want 1", len(p.Gates)) } g := p.Gates[0] - if len(g.EvalNudges) != len(g.Evaluators) { - t.Fatalf("EvalNudges len = %d, Evaluators len = %d (must be parallel)", - len(g.EvalNudges), len(g.Evaluators)) + if len(g.Evals) != 1 { + t.Fatalf("len(Evals) = %d, want 1", len(g.Evals)) } - if g.EvalNudges[0] != "use trash instead" { - t.Fatalf("EvalNudges[0] = %q, want %q", g.EvalNudges[0], "use trash instead") + if g.Evals[0].nudge != "use trash instead" { + t.Fatalf("Evals[0].nudge = %q, want %q", g.Evals[0].nudge, "use trash instead") + } + if g.Evals[0].eval == nil { + t.Fatal("Evals[0].eval must be non-nil") } } @@ -1038,10 +1042,12 @@ func TestEvaluate_NonMatchingRequestHasNoNudge(t *testing.T) { } } -// Monitor-mode matches downgrade deny → allow and must NOT carry the -// nudge through. The agent is being allowed to proceed; surfacing -// remediation guidance would be misleading. -func TestEvaluate_MonitorDowngradeSuppressesNudge(t *testing.T) { +// Monitor-mode matches downgrade deny → allow but PRESERVE the nudge so +// a downstream daemon-level firewall escalation can re-attach the hint. +// The decision to suppress the nudge on a final-allow verdict lives in +// the API layer (applyDaemonModeOverride) — see +// TestApplyDaemonModeOverride_MonitorMatchStripsNudge. +func TestEvaluate_MonitorDowngradeKeepsNudge(t *testing.T) { p, _ := Load(strings.NewReader(nudgeMonitorYAML)) v := p.Evaluate(EvalRequest{ Tool: "Bash", @@ -1053,7 +1059,10 @@ func TestEvaluate_MonitorDowngradeSuppressesNudge(t *testing.T) { if !v.MonitorMatch { t.Fatal("MonitorMatch must be true on a monitor downgrade") } - if v.Nudge != "" { - t.Fatalf("nudge must be empty on monitor downgrade, got %q", v.Nudge) + if v.OriginalVerdict != "deny" { + t.Fatalf("OriginalVerdict = %q, want deny", v.OriginalVerdict) + } + if v.Nudge != "use trash instead" { + t.Fatalf("nudge must survive monitor downgrade for firewall re-escalation, got %q", v.Nudge) } } From 07c56c26eabfcea5109c221657f2f4b3baaebf79 Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 15:26:03 -0700 Subject: [PATCH 3/6] feat(hooks): concatenate nudge into deny reason for claude/codex/cursor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a policy rule with `nudge:` produces a deny verdict, surface the hint to the model through the harness's only inbound channel — the deny-reason text — by appending it as `\n\n→ Suggested: ` to both permissionDecisionReason and stopReason. Implemented as a shared denyReasonWithNudge helper invoked by claudePreToolUseHandler, codexPreToolUseHandler, cursorGateHandler (preToolUse + beforeMCPExecution), and cursorBeforeShellHandler. Allow / monitor / non-matching paths see Nudge == "" so they pass through unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/tests/hook-claude-code.test.ts | 41 +++++++ cli/tests/hook-codex.test.ts | 40 +++++++ cli/tests/hook-cursor.test.ts | 40 +++++++ control-plane/internal/api/hooks_claude.go | 8 +- .../internal/api/hooks_claude_test.go | 52 ++++++++ control-plane/internal/api/hooks_codex.go | 7 +- .../internal/api/hooks_codex_test.go | 51 ++++++++ control-plane/internal/api/hooks_cursor.go | 14 ++- .../internal/api/hooks_cursor_test.go | 112 ++++++++++++++++++ control-plane/internal/api/mode.go | 19 +++ 10 files changed, 376 insertions(+), 8 deletions(-) diff --git a/cli/tests/hook-claude-code.test.ts b/cli/tests/hook-claude-code.test.ts index 396297c..d3b5c5b 100644 --- a/cli/tests/hook-claude-code.test.ts +++ b/cli/tests/hook-claude-code.test.ts @@ -162,6 +162,47 @@ describe("hook claude-code shim", () => { expect(r.stdout).toContain('"permissionDecision":"deny"'); }); + test("deny with nudge → stderr carries the suggested-line", async () => { + // The daemon concatenates the rule's nudge into the deny reason as + // "\n\n→ Suggested: ". The CLI shim is a transparent + // forwarder: whatever permissionDecisionReason it gets MUST land on + // stderr verbatim so Claude Code surfaces the hint to the model. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/claude-code/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + session_id: "sess_nudge", + hook_event_name: "PreToolUse", + tool_name: "Bash", + tool_use_id: "t_nudge", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stderr).toContain("safety.rm-suggest-trash"); + }); + test("daemon unreachable → silent fail-open on every event", async () => { // Every event must produce empty stdout AND empty stderr with exit 0. // Anything else either pollutes the model's input stream or triggers diff --git a/cli/tests/hook-codex.test.ts b/cli/tests/hook-codex.test.ts index 3390174..cb2b074 100644 --- a/cli/tests/hook-codex.test.ts +++ b/cli/tests/hook-codex.test.ts @@ -157,6 +157,46 @@ describe("hook codex shim", () => { expect(r.stdout).toContain('"permissionDecision":"deny"'); }); + test("deny with nudge → stderr carries the suggested-line", async () => { + // Same forwarding contract as the claude-code shim: the daemon + // builds "\n\n→ Suggested: " and the shim mirrors + // that text onto stderr so Codex shows the hint to the model. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/codex/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + session_id: "sess_nudge", + hook_event_name: "PreToolUse", + tool_name: "Bash", + tool_use_id: "t_nudge", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stderr).toContain("safety.rm-suggest-trash"); + }); + test("daemon unreachable → silent fail-open on every event", async () => { // Codex hides hook stderr on exit-0 and renders any non-zero exit // as a "(failed)" banner that looks like a real error. Neither is diff --git a/cli/tests/hook-cursor.test.ts b/cli/tests/hook-cursor.test.ts index 85f77ae..1304cee 100644 --- a/cli/tests/hook-cursor.test.ts +++ b/cli/tests/hook-cursor.test.ts @@ -157,6 +157,46 @@ describe("hook cursor shim", () => { expect(r.stdout).toContain("rogue.destructive-bash"); }); + test("deny with nudge → stderr and stdout carry the suggested-line", async () => { + // Cursor shim mirrors the daemon's reason into BOTH stderr and the + // stdout JSON's agent_message field, so the hint surfaces no matter + // which channel Cursor renders. + const recorded: RecordedRequest[] = []; + const concatenated = + "matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead"; + const m = startMockDaemon( + { + "/v1/hooks/cursor/pre-tool-use": { + status: 200, + body: { + continue: false, + stopReason: concatenated, + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: concatenated, + }, + }, + }, + }, + recorded, + ); + server = m.server; + + const payload = JSON.stringify({ + conversation_id: "conv_nudge", + hook_event_name: "preToolUse", + tool_name: "Bash", + tool_input: { command: "rm -rf /tmp/x" }, + }); + const r = await runShim(["pre-tool-use"], payload, m.url); + expect(r.code).toBe(2); + expect(r.stderr).toContain("→ Suggested: "); + expect(r.stderr).toContain("use trash instead"); + expect(r.stdout).toContain("→ Suggested: "); + expect(r.stdout).toContain("use trash instead"); + }); + test("daemon unreachable → silent fail-open with plain allow envelope on every event", async () => { // Cursor has no UI surface outside the model's input stream that we // can write to (no statusLine, no safe agent_message). On a transport diff --git a/control-plane/internal/api/hooks_claude.go b/control-plane/internal/api/hooks_claude.go index 4dddc52..ea6255b 100644 --- a/control-plane/internal/api/hooks_claude.go +++ b/control-plane/internal/api/hooks_claude.go @@ -114,16 +114,20 @@ func claudePreToolUseHandler(d Deps) http.HandlerFunc { return } + // On deny with a nudge attached, append the hint into the same + // channel — the harness's reason text — so the model sees both + // the verdict explanation and the suggested remediation. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "PreToolUse", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_claude_test.go b/control-plane/internal/api/hooks_claude_test.go index 005de5c..2b1b53f 100644 --- a/control-plane/internal/api/hooks_claude_test.go +++ b/control-plane/internal/api/hooks_claude_test.go @@ -354,6 +354,58 @@ func TestGateCheck_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { } } +// TestClaudePreToolUse_DenyConcatenatesNudge verifies that when a policy +// rule fires with a `nudge:` clause, the daemon's deny response carries +// both the original "matched rule X (deny)" reason AND the literal +// "→ Suggested: " suffix in BOTH permissionDecisionReason AND +// stopReason — these are the only channels the harness uses to surface +// the explanation to the model. The exact "→ Suggested: " prefix is the +// stable contract downstream consumers (CLI shim, dashboard) match on. +func TestClaudePreToolUse_DenyConcatenatesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "session_id": "claude-sess-nudge", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "toolu_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postClaudePre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path: a benign command must NOT carry the suggested-line. + allowBody := `{ + "session_id": "claude-sess-nudge-allow", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "toolu_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postClaudePre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + func TestClaudePreToolUse_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { t.Setenv("AGENTLOCK_MODE", "") runtimeMode.Store("firewall") diff --git a/control-plane/internal/api/hooks_codex.go b/control-plane/internal/api/hooks_codex.go index a4af14e..8dbe7b5 100644 --- a/control-plane/internal/api/hooks_codex.go +++ b/control-plane/internal/api/hooks_codex.go @@ -109,16 +109,19 @@ func codexPreToolUseHandler(d Deps) http.HandlerFunc { // Same response envelope as Claude — the shim binary maps it onto // Codex's exit-code semantics (allow → exit 0; deny → exit 2 with // the JSON also written to stdout for harnesses that read it). + // On deny + nudge, the hint is concatenated into the reason so + // the model sees it through Codex's stderr channel. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "PreToolUse", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_codex_test.go b/control-plane/internal/api/hooks_codex_test.go index ccde747..660ae93 100644 --- a/control-plane/internal/api/hooks_codex_test.go +++ b/control-plane/internal/api/hooks_codex_test.go @@ -267,6 +267,57 @@ func TestCodexSessionStart_CreatesSession(t *testing.T) { } } +// Mirror of TestClaudePreToolUse_DenyConcatenatesNudge for the Codex +// hook path. The daemon must inject the `→ Suggested: ` suffix +// into the Codex reply's reason fields whenever the firing rule carries +// a nudge. +func TestCodexPreToolUse_DenyConcatenatesNudge(t *testing.T) { + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "session_id": "codex-sess-nudge", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "codex_nudge", + "turn_id": "turn_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postCodexPre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path stays untouched — no suggested-line on benign commands. + allowBody := `{ + "session_id": "codex-sess-nudge-allow", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_use_id": "codex_nudge_allow", + "turn_id": "turn_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postCodexPre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + // Mirror of TestClaudePreToolUse_FirewallEscalatesPolicyMonitorMatch // (hooks_claude_test.go) — daemon=firewall must re-escalate a // policy-monitor match back to deny on the codex hook path too. diff --git a/control-plane/internal/api/hooks_cursor.go b/control-plane/internal/api/hooks_cursor.go index c62ef03..36ef755 100644 --- a/control-plane/internal/api/hooks_cursor.go +++ b/control-plane/internal/api/hooks_cursor.go @@ -263,16 +263,20 @@ func cursorGateHandler(d Deps, eventName string, kind cursorDedupeKind) http.Han return } + // On deny + nudge, append the hint into the reason. The cached + // response below preserves the concatenated form so the paired + // MCP/preToolUse event short-circuits with the same text. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: eventName, PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } // Cache the response keyed on tool_use_id so the paired event @@ -515,16 +519,18 @@ func cursorBeforeShellHandler(d Deps) http.HandlerFunc { // No ledger append — preToolUse owns the audit trail for this // tool call. We're just echoing the verdict in Cursor's expected // envelope so it can fail-closed at the shell-execution stage. + // Same nudge concat as the gate handler for parity. + reason := denyReasonWithNudge(result) out := claudeHookOutput{ Continue: result.Verdict == "allow", HookSpecificOutput: claudeHookSpecifics{ HookEventName: "BeforeShellExecution", PermissionDecision: result.Verdict, - PermissionDecisionReason: result.Reason, + PermissionDecisionReason: reason, }, } if result.Verdict == "deny" { - out.StopReason = result.Reason + out.StopReason = reason } writeJSON(w, http.StatusOK, out) } diff --git a/control-plane/internal/api/hooks_cursor_test.go b/control-plane/internal/api/hooks_cursor_test.go index f5a9cec..7dc9b30 100644 --- a/control-plane/internal/api/hooks_cursor_test.go +++ b/control-plane/internal/api/hooks_cursor_test.go @@ -447,6 +447,118 @@ func TestCursorPreToolUse_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { } } +// shellNudgePolicyYAML mirrors nudgePolicyYAML (gate_test.go) but matches +// on `tool: Shell`, the synthetic name BeforeShellExecution evaluates +// against (no tool_name in that payload). +const shellNudgePolicyYAML = ` +version: 1 +mode: enforce +defaults: + bash: allow +gates: + - id: safety.rm-suggest-trash + match: + tool: Shell + any_command_regex: + - '\brm\s+(-[rRfF]+\s+)+\S+' + evaluate: + - kind: always + action: deny + nudge: "use trash instead" +` + +// Mirror of TestClaudePreToolUse_DenyConcatenatesNudge for the Cursor +// preToolUse handler. The same gate handler also serves +// beforeMCPExecution, so this exercises the shared concat path. +func TestCursorPreToolUse_DenyConcatenatesNudge(t *testing.T) { + cursorResetDedupe() + fx := newGateFixture(t, nudgePolicyYAML) + body := `{ + "conversation_id": "cursor-sess-nudge", + "generation_id": "gen-nudge", + "hook_event_name": "preToolUse", + "cursor_version": "1.7.0", + "tool_name": "Bash", + "tool_use_id": "cursor_nudge", + "tool_input": {"command": "rm -rf /tmp/x"} + }` + res, out := postCursorPre(t, fx, body) + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } + stop, _ := out["stopReason"].(string) + if !strings.Contains(stop, "→ Suggested: use trash instead") { + t.Fatalf("stopReason missing nudge suffix: %q", stop) + } + + // Allow path: a benign command must NOT carry the suggested-line. + cursorResetDedupe() + allowBody := `{ + "conversation_id": "cursor-sess-nudge-allow", + "generation_id": "gen-nudge-allow", + "hook_event_name": "preToolUse", + "cursor_version": "1.7.0", + "tool_name": "Bash", + "tool_use_id": "cursor_nudge_allow", + "tool_input": {"command": "ls -la"} + }` + _, allowOut := postCursorPre(t, fx, allowBody) + allowSpec, _ := allowOut["hookSpecificOutput"].(map[string]any) + allowReason, _ := allowSpec["permissionDecisionReason"].(string) + if strings.Contains(allowReason, "→ Suggested:") { + t.Fatalf("allow reason must not carry nudge: %q", allowReason) + } +} + +// Cursor 2.x's BeforeShellExecution payload routes through a separate +// handler (no tool_use_id, no tool_name); confirm the same nudge concat +// applies there too. +func TestCursorBeforeShellExecution_DenyConcatenatesNudge(t *testing.T) { + cursorResetDedupe() + fx := newGateFixture(t, shellNudgePolicyYAML) + body := `{ + "conversation_id": "cursor-shell-nudge", + "generation_id": "gen-shell-nudge", + "hook_event_name": "beforeShellExecution", + "command": "rm -rf /tmp/x", + "cwd": "/tmp", + "sandbox": false, + "cursor_version": "1.7.0" + }` + res, err := http.Post(fx.srv.URL+"/v1/hooks/cursor/before-shell-execution", "application/json", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST: %v", err) + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d", res.StatusCode) + } + var out map[string]any + _ = json.NewDecoder(res.Body).Decode(&out) + spec, _ := out["hookSpecificOutput"].(map[string]any) + if spec["permissionDecision"] != "deny" { + t.Fatalf("decision = %v, want deny", spec) + } + reason, _ := spec["permissionDecisionReason"].(string) + if !strings.Contains(reason, "matched rule safety.rm-suggest-trash (deny)") { + t.Fatalf("permissionDecisionReason missing original reason: %q", reason) + } + if !strings.Contains(reason, "→ Suggested: use trash instead") { + t.Fatalf("permissionDecisionReason missing nudge suffix: %q", reason) + } +} + // BeforeShellExecution doesn't append to the ledger (PreToolUse owns // that), so we only assert on the response envelope. func TestCursorBeforeShellExecution_FirewallEscalatesPolicyMonitorMatch(t *testing.T) { diff --git a/control-plane/internal/api/mode.go b/control-plane/internal/api/mode.go index 94de75c..32b418d 100644 --- a/control-plane/internal/api/mode.go +++ b/control-plane/internal/api/mode.go @@ -50,6 +50,25 @@ func setRuntimeMode(m string) bool { return false } +// denyReasonWithNudge composes the user-facing deny reason that the +// harness shims forward to the model. When the policy carried a nudge +// hint with the matching rule, we append it after the original reason +// using a stable, parseable shape: +// +// "\n\n→ Suggested: " +// +// The exact arrow + "Suggested" prefix is part of the contract — tests +// across the daemon, CLI, and downstream tooling rely on the literal +// substring "→ Suggested: " to spot the hint. Allow / monitor / +// non-matching paths feed result.Nudge == "" here and pass through +// unchanged. +func denyReasonWithNudge(result policy.EvalResult) string { + if result.Verdict != "deny" || result.Nudge == "" { + return result.Reason + } + return result.Reason + "\n\n→ Suggested: " + result.Nudge +} + // applyDaemonModeOverride composes the daemon-level mode with a policy // EvalResult. The daemon mode is the *outer* switch — the dashboard's // big red button — and must trump the per-policy / per-gate monitor From cdc85e0cbc45e6249547358a1a46b0fe30f33dbf Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 15:38:35 -0700 Subject: [PATCH 4/6] docs: document the rule nudge field across policies, api, and hooks references Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/guide/policies.md | 13 +++++++++++++ docs/reference/api.md | 10 ++++++++++ docs/reference/hooks.md | 4 ++++ 3 files changed, 27 insertions(+) diff --git a/docs/guide/policies.md b/docs/guide/policies.md index 7d8bc3e..9c2e068 100644 --- a/docs/guide/policies.md +++ b/docs/guide/policies.md @@ -151,6 +151,19 @@ gates: The daemon's regex engine is Go RE2 — **no negative lookahead, no backreferences**. If you find yourself reaching for `(?!…)`, invert the match: write a positive regex for the dangerous shape rather than a negative regex around the safe one. +### Nudges + +Each `evaluate[]` clause may carry an optional `nudge: ` hint. When the clause fires a `deny`, the harness shim splices the hint onto the reason it forwards to the model as `"\n\n→ Suggested: "`. Use it to redirect the agent toward a safer command (e.g. `trash` instead of `rm`) or to point at the right [skill](https://github.com/openagentlock/skills) instead of leaving it to retry blindly. + +```yaml +evaluate: + - kind: always + action: deny + nudge: "use `trash ` (macOS) — recoverable from Trash" +``` + +Nudges only surface on `deny` verdicts; `allow`, monitor-suppressed, and non-matching paths drop the field. See the [openagentlock/rules](https://github.com/openagentlock/rules) registry for `safety.rm-suggest-trash` and `safety.secret-read-suggest-skill` as canonical examples. + ## Enforcement vs monitor - **Monitor** — every gate matches but the verdict is downgraded to `allow` for the harness. Use this on day one. diff --git a/docs/reference/api.md b/docs/reference/api.md index 1bac3b8..b326218 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -22,6 +22,16 @@ Toggles the daemon-level `monitor` ↔ `firewall` switch. This is the outer over Returns the list of currently active gates with their per-gate status (matched count, last-hit timestamp). +`POST /v1/gates/check` + +Synchronous verdict for a single tool call. Response fields: + +- `verdict` — `allow` or `deny` (the daemon never returns `ask` on this path) +- `rule_id` — id of the gate that matched, empty when no rule fired +- `reason` — short human-readable explanation, surfaced verbatim by the harness shim +- `nudge` — optional, string. Present only when the matched rule defined a `nudge:` hint **and** the final verdict is `deny`. Allow / monitor-suppressed paths drop it. See [Policies → Nudges](../guide/policies.md#nudges). +- `ledger_seq` — sequence number of the corresponding ledger leaf + ## Install `POST /v1/install/plan` — render a diff of what would change in the harness configs diff --git a/docs/reference/hooks.md b/docs/reference/hooks.md index ae674a9..7787fce 100644 --- a/docs/reference/hooks.md +++ b/docs/reference/hooks.md @@ -81,3 +81,7 @@ The control plane normalizes hook payloads into a single shape regardless of har ``` Ledger leaves use the same shape plus `verdict`, `reason`, `policy_rule_id`, and `signer`. + +## Nudges in deny replies + +When a matched rule carries a `nudge:` hint (see [Policies → Nudges](../guide/policies.md#nudges)) and the final verdict is `deny`, every harness shim — Claude Code, Codex, Cursor — appends the hint to the deny reason it forwards to the model. The format is the literal string `"\n\n→ Suggested: "` — arrow `→`, capital `S`, colon-space — and is intentionally stable so external tools can grep for `→ Suggested: ` to spot the hint. Allow, monitor-suppressed, and non-matching paths drop the field; the reason flows through unchanged. From 3f436611cdb56608f9ab809efd28d47710641620 Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 15:43:26 -0700 Subject: [PATCH 5/6] test(e2e): nudge round-trip via fake-hook for Bash and Read tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three end-to-end tests that exercise the full nudge wire path — real daemon, real CLI subprocess, real policy file — covering both deny-with-nudge and allow-without-nudge cases. The omitempty case asserts wire-level absence (Object.keys / `in`) rather than empty string, so a regression that emits `nudge: ""` on allow paths is caught. The e2e fixture policy now includes two nudge-bearing gates: `safety.rm-suggest-trash` (Bash, `rm -rf`) and `safety.secret-read-suggest-skill` (Read, `**/.aws/credentials`). The Read path is chosen so it does not collide with the existing `rogue.secret-read` globs (`**/.env*`, `**/.ssh/**`). `safety.rm-suggest-trash` is intentionally placed BEFORE `rogue.destructive-bash` so a plain `rm -rf` fires the nudge rule; the legacy destructive-bash test was retargeted to `git push --force` (the second alternation in destructive-bash) so that rule's coverage is preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/tests/e2e.test.ts | 141 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/cli/tests/e2e.test.ts b/cli/tests/e2e.test.ts index 66fa6ae..430077d 100644 --- a/cli/tests/e2e.test.ts +++ b/cli/tests/e2e.test.ts @@ -90,6 +90,17 @@ beforeAll(async () => { // Enforce-mode policy so the destructive-bash test observes a real deny // rather than a monitor-mode allow-with-tag. + // + // Gate ordering matters here: `safety.rm-suggest-trash` is intentionally + // placed BEFORE `rogue.destructive-bash` so that a plain `rm -rf` Bash + // command fires the nudge-bearing rule (the round-trip we're testing in + // T5). The legacy destructive-bash test now exercises the second + // alternation (`git push --force`) which still flows through + // `rogue.destructive-bash` — coverage preserved. + // + // `safety.secret-read-suggest-skill` matches `**/.aws/credentials`, a + // path that does NOT match `rogue.secret-read`'s `**/.env*` or + // `**/.ssh/**` globs, so the new rule fires cleanly without colliding. const policyPath = join(agentlockHome, "policy.yaml"); writeFileSync( policyPath, @@ -99,6 +110,23 @@ mode: enforce defaults: bash: allow gates: + - id: safety.rm-suggest-trash + match: + tool: Bash + any_command_regex: + - 'rm\\s+-rf\\b' + evaluate: + - kind: always + action: deny + nudge: "use 'trash ' (macOS) or move the directory aside — recoverable from Trash." + - id: safety.secret-read-suggest-skill + match: + tool: Read + path_glob: "**/.aws/credentials" + evaluate: + - kind: always + action: deny + nudge: "use the openagentlock/skills secret-fetcher skill if installed; otherwise ask the operator to paste the credentials." - id: rogue.destructive-bash match: tool: Bash @@ -379,6 +407,9 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { test("fake-hook: destructive Bash command → deny + rule_id=rogue.destructive-bash", async () => { const sessionId = await createSession(); + // Uses `git push --force` (the second alternation in destructive-bash) + // because `rm -rf` now matches safety.rm-suggest-trash first — see the + // gate-ordering comment in the policy fixture above. const proc = spawn({ cmd: [ "bun", @@ -392,7 +423,7 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { "--tool", "Bash", "--command", - "rm -rf /tmp/demo", + "git push origin main --force", "--json", "--url", `http://127.0.0.1:${port}`, @@ -500,6 +531,114 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => { expect(v.rule_id).toBe("rogue.secret-read"); }); + // T5 nudge round-trip: a policy rule with `nudge:` must surface the + // hint string in the gate JSON response, and rules without a nudge must + // omit the field entirely (omitempty on the wire). + + test("fake-hook: rm -rf → deny with safety.rm-suggest-trash and nudge text", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--source", + "claude-code", + "--tool", + "Bash", + "--command", + "rm -rf /tmp/demo", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(3); + const v = JSON.parse(stdout) as { + verdict: string; + rule_id: string; + nudge: string; + }; + expect(v.verdict).toBe("deny"); + expect(v.rule_id).toBe("safety.rm-suggest-trash"); + expect(v.nudge).toContain("trash"); + }); + + test("fake-hook: Read .aws/credentials → deny with secret-read-suggest-skill nudge", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--tool", + "Read", + "--file-path", + "/home/alice/.aws/credentials", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(3); + const v = JSON.parse(stdout) as { + verdict: string; + rule_id: string; + nudge: string; + }; + expect(v.verdict).toBe("deny"); + expect(v.rule_id).toBe("safety.secret-read-suggest-skill"); + expect(v.nudge).toContain("secret-fetcher"); + }); + + test("fake-hook: allow path → no nudge field in JSON (omitempty wire check)", async () => { + const sessionId = await createSession(); + const proc = spawn({ + cmd: [ + "bun", + "run", + CLI_ENTRY, + "fake-hook", + "--session", + sessionId, + "--source", + "claude-code", + "--tool", + "Bash", + "--command", + "echo hello", + "--json", + "--url", + `http://127.0.0.1:${port}`, + ], + stdout: "pipe", + stderr: "pipe", + }); + const stdout = await new Response(proc.stdout).text(); + await proc.exited; + expect(proc.exitCode).toBe(0); + const v = JSON.parse(stdout) as Record; + expect(v.verdict).toBe("allow"); + // omitempty must drop the `nudge` key when no rule fired with one — + // not just produce an empty string. Assert wire-level absence. + expect("nudge" in v).toBe(false); + expect(Object.keys(v)).not.toContain("nudge"); + }); + test("fake-hook: pip install numpy → allow (pkg on allowlist)", async () => { const sessionId = await createSession(); const proc = spawn({ From d4ceb02869658e76557502f7eb4732774d7a58a1 Mon Sep 17 00:00:00 2001 From: knhn1004 <49494541+knhn1004@users.noreply.github.com> Date: Sun, 3 May 2026 15:48:07 -0700 Subject: [PATCH 6/6] docs(api): add request body schema for POST /v1/gates/check Adds a brief request-fields list mirroring the new response-fields list so the gates/check section is balanced rather than docs-half-an-endpoint. Per CodeRabbit feedback on PR #53. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/reference/api.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/reference/api.md b/docs/reference/api.md index b326218..d1f3eb4 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -24,7 +24,16 @@ Returns the list of currently active gates with their per-gate status (matched c `POST /v1/gates/check` -Synchronous verdict for a single tool call. Response fields: +Synchronous verdict for a single tool call. Request body: + +- `session_id` — required, string. From `POST /v1/sessions/create`. +- `source` — required, string. Harness id (`claude-code`, `codex`, `cursor`, `mcp-proxy`). +- `tool` — required, string. The tool name being checked (`Bash`, `Read`, `Write`, `mcp____`). +- `input` — required, object. The tool's input shape (e.g. `{ "command": "rm -rf foo" }` for Bash). +- `cwd` — optional, string. Working directory if relevant to the rule. +- `meta` — optional, object. Free-form context for evaluators that look beyond `input`. + +Response fields: - `verdict` — `allow` or `deny` (the daemon never returns `ask` on this path) - `rule_id` — id of the gate that matched, empty when no rule fired