Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 140 additions & 1 deletion cli/tests/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ beforeAll(async () => {

// Enforce-mode policy so the destructive-bash test observes a real deny
// rather than a monitor-mode allow-with-tag.
//
// Gate ordering matters here: `safety.rm-suggest-trash` is intentionally
// placed BEFORE `rogue.destructive-bash` so that a plain `rm -rf` Bash
// command fires the nudge-bearing rule (the round-trip we're testing in
// T5). The legacy destructive-bash test now exercises the second
// alternation (`git push --force`) which still flows through
// `rogue.destructive-bash` — coverage preserved.
//
// `safety.secret-read-suggest-skill` matches `**/.aws/credentials`, a
// path that does NOT match `rogue.secret-read`'s `**/.env*` or
// `**/.ssh/**` globs, so the new rule fires cleanly without colliding.
const policyPath = join(agentlockHome, "policy.yaml");
writeFileSync(
policyPath,
Expand All @@ -99,6 +110,23 @@ mode: enforce
defaults:
bash: allow
gates:
- id: safety.rm-suggest-trash
match:
tool: Bash
any_command_regex:
- 'rm\\s+-rf\\b'
evaluate:
- kind: always
action: deny
nudge: "use 'trash <path>' (macOS) or move the directory aside — recoverable from Trash."
- id: safety.secret-read-suggest-skill
match:
tool: Read
path_glob: "**/.aws/credentials"
evaluate:
- kind: always
action: deny
nudge: "use the openagentlock/skills secret-fetcher skill if installed; otherwise ask the operator to paste the credentials."
- id: rogue.destructive-bash
match:
tool: Bash
Expand Down Expand Up @@ -379,6 +407,9 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => {

test("fake-hook: destructive Bash command → deny + rule_id=rogue.destructive-bash", async () => {
const sessionId = await createSession();
// Uses `git push --force` (the second alternation in destructive-bash)
// because `rm -rf` now matches safety.rm-suggest-trash first — see the
// gate-ordering comment in the policy fixture above.
const proc = spawn({
cmd: [
"bun",
Expand All @@ -392,7 +423,7 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => {
"--tool",
"Bash",
"--command",
"rm -rf /tmp/demo",
"git push origin main --force",
"--json",
"--url",
`http://127.0.0.1:${port}`,
Expand Down Expand Up @@ -500,6 +531,114 @@ describe.if(!SKIP)("e2e — CLI <-> control-plane", () => {
expect(v.rule_id).toBe("rogue.secret-read");
});

// T5 nudge round-trip: a policy rule with `nudge:` must surface the
// hint string in the gate JSON response, and rules without a nudge must
// omit the field entirely (omitempty on the wire).

test("fake-hook: rm -rf → deny with safety.rm-suggest-trash and nudge text", async () => {
const sessionId = await createSession();
const proc = spawn({
cmd: [
"bun",
"run",
CLI_ENTRY,
"fake-hook",
"--session",
sessionId,
"--source",
"claude-code",
"--tool",
"Bash",
"--command",
"rm -rf /tmp/demo",
"--json",
"--url",
`http://127.0.0.1:${port}`,
],
stdout: "pipe",
stderr: "pipe",
});
const stdout = await new Response(proc.stdout).text();
await proc.exited;
expect(proc.exitCode).toBe(3);
const v = JSON.parse(stdout) as {
verdict: string;
rule_id: string;
nudge: string;
};
expect(v.verdict).toBe("deny");
expect(v.rule_id).toBe("safety.rm-suggest-trash");
expect(v.nudge).toContain("trash");
});

test("fake-hook: Read .aws/credentials → deny with secret-read-suggest-skill nudge", async () => {
const sessionId = await createSession();
const proc = spawn({
cmd: [
"bun",
"run",
CLI_ENTRY,
"fake-hook",
"--session",
sessionId,
"--tool",
"Read",
"--file-path",
"/home/alice/.aws/credentials",
"--json",
"--url",
`http://127.0.0.1:${port}`,
],
stdout: "pipe",
stderr: "pipe",
});
const stdout = await new Response(proc.stdout).text();
await proc.exited;
expect(proc.exitCode).toBe(3);
const v = JSON.parse(stdout) as {
verdict: string;
rule_id: string;
nudge: string;
};
expect(v.verdict).toBe("deny");
expect(v.rule_id).toBe("safety.secret-read-suggest-skill");
expect(v.nudge).toContain("secret-fetcher");
});

test("fake-hook: allow path → no nudge field in JSON (omitempty wire check)", async () => {
const sessionId = await createSession();
const proc = spawn({
cmd: [
"bun",
"run",
CLI_ENTRY,
"fake-hook",
"--session",
sessionId,
"--source",
"claude-code",
"--tool",
"Bash",
"--command",
"echo hello",
"--json",
"--url",
`http://127.0.0.1:${port}`,
],
stdout: "pipe",
stderr: "pipe",
});
const stdout = await new Response(proc.stdout).text();
await proc.exited;
expect(proc.exitCode).toBe(0);
const v = JSON.parse(stdout) as Record<string, unknown>;
expect(v.verdict).toBe("allow");
// omitempty must drop the `nudge` key when no rule fired with one —
// not just produce an empty string. Assert wire-level absence.
expect("nudge" in v).toBe(false);
expect(Object.keys(v)).not.toContain("nudge");
});

test("fake-hook: pip install numpy → allow (pkg on allowlist)", async () => {
const sessionId = await createSession();
const proc = spawn({
Expand Down
41 changes: 41 additions & 0 deletions cli/tests/hook-claude-code.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,47 @@ describe("hook claude-code shim", () => {
expect(r.stdout).toContain('"permissionDecision":"deny"');
});

test("deny with nudge → stderr carries the suggested-line", async () => {
// The daemon concatenates the rule's nudge into the deny reason as
// "<reason>\n\n→ Suggested: <hint>". The CLI shim is a transparent
// forwarder: whatever permissionDecisionReason it gets MUST land on
// stderr verbatim so Claude Code surfaces the hint to the model.
const recorded: RecordedRequest[] = [];
const concatenated =
"matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead";
const m = startMockDaemon(
{
"/v1/hooks/claude-code/pre-tool-use": {
status: 200,
body: {
continue: false,
stopReason: concatenated,
hookSpecificOutput: {
hookEventName: "PreToolUse",
permissionDecision: "deny",
permissionDecisionReason: concatenated,
},
},
},
},
recorded,
);
server = m.server;

const payload = JSON.stringify({
session_id: "sess_nudge",
hook_event_name: "PreToolUse",
tool_name: "Bash",
tool_use_id: "t_nudge",
tool_input: { command: "rm -rf /tmp/x" },
});
const r = await runShim(["pre-tool-use"], payload, m.url);
expect(r.code).toBe(2);
expect(r.stderr).toContain("→ Suggested: ");
expect(r.stderr).toContain("use trash instead");
expect(r.stderr).toContain("safety.rm-suggest-trash");
});

test("daemon unreachable → silent fail-open on every event", async () => {
// Every event must produce empty stdout AND empty stderr with exit 0.
// Anything else either pollutes the model's input stream or triggers
Expand Down
40 changes: 40 additions & 0 deletions cli/tests/hook-codex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,46 @@ describe("hook codex shim", () => {
expect(r.stdout).toContain('"permissionDecision":"deny"');
});

test("deny with nudge → stderr carries the suggested-line", async () => {
// Same forwarding contract as the claude-code shim: the daemon
// builds "<reason>\n\n→ Suggested: <hint>" and the shim mirrors
// that text onto stderr so Codex shows the hint to the model.
const recorded: RecordedRequest[] = [];
const concatenated =
"matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead";
const m = startMockDaemon(
{
"/v1/hooks/codex/pre-tool-use": {
status: 200,
body: {
continue: false,
stopReason: concatenated,
hookSpecificOutput: {
hookEventName: "PreToolUse",
permissionDecision: "deny",
permissionDecisionReason: concatenated,
},
},
},
},
recorded,
);
server = m.server;

const payload = JSON.stringify({
session_id: "sess_nudge",
hook_event_name: "PreToolUse",
tool_name: "Bash",
tool_use_id: "t_nudge",
tool_input: { command: "rm -rf /tmp/x" },
});
const r = await runShim(["pre-tool-use"], payload, m.url);
expect(r.code).toBe(2);
expect(r.stderr).toContain("→ Suggested: ");
expect(r.stderr).toContain("use trash instead");
expect(r.stderr).toContain("safety.rm-suggest-trash");
});

test("daemon unreachable → silent fail-open on every event", async () => {
// Codex hides hook stderr on exit-0 and renders any non-zero exit
// as a "(failed)" banner that looks like a real error. Neither is
Expand Down
40 changes: 40 additions & 0 deletions cli/tests/hook-cursor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,46 @@ describe("hook cursor shim", () => {
expect(r.stdout).toContain("rogue.destructive-bash");
});

test("deny with nudge → stderr and stdout carry the suggested-line", async () => {
// Cursor shim mirrors the daemon's reason into BOTH stderr and the
// stdout JSON's agent_message field, so the hint surfaces no matter
// which channel Cursor renders.
const recorded: RecordedRequest[] = [];
const concatenated =
"matched rule safety.rm-suggest-trash (deny)\n\n→ Suggested: use trash instead";
const m = startMockDaemon(
{
"/v1/hooks/cursor/pre-tool-use": {
status: 200,
body: {
continue: false,
stopReason: concatenated,
hookSpecificOutput: {
hookEventName: "PreToolUse",
permissionDecision: "deny",
permissionDecisionReason: concatenated,
},
},
},
},
recorded,
);
server = m.server;

const payload = JSON.stringify({
conversation_id: "conv_nudge",
hook_event_name: "preToolUse",
tool_name: "Bash",
tool_input: { command: "rm -rf /tmp/x" },
});
const r = await runShim(["pre-tool-use"], payload, m.url);
expect(r.code).toBe(2);
expect(r.stderr).toContain("→ Suggested: ");
expect(r.stderr).toContain("use trash instead");
expect(r.stdout).toContain("→ Suggested: ");
expect(r.stdout).toContain("use trash instead");
});

test("daemon unreachable → silent fail-open with plain allow envelope on every event", async () => {
// Cursor has no UI surface outside the model's input stream that we
// can write to (no statusLine, no safe agent_message). On a transport
Expand Down
2 changes: 2 additions & 0 deletions control-plane/internal/api/gate.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type gateCheckResponse struct {
Reason string `json:"reason"`
LedgerSeq uint64 `json:"ledger_seq"`
Monitor bool `json:"monitor,omitempty"`
Nudge string `json:"nudge,omitempty"`
}

func gateCheckHandler(d Deps) http.HandlerFunc {
Expand Down Expand Up @@ -118,6 +119,7 @@ func gateCheckHandler(d Deps) http.HandlerFunc {
Reason: result.Reason,
LedgerSeq: entry.Seq,
Monitor: result.MonitorMatch,
Nudge: result.Nudge,
})
}
}
Loading
Loading